diff options
Diffstat (limited to 'external/hash')
37 files changed, 4442 insertions, 0 deletions
diff --git a/external/hash/.gitignore b/external/hash/.gitignore new file mode 100644 index 0000000..a007fea --- /dev/null +++ b/external/hash/.gitignore @@ -0,0 +1 @@ +build/* diff --git a/external/hash/CMakeLists.txt b/external/hash/CMakeLists.txt new file mode 100644 index 0000000..7b7d990 --- /dev/null +++ b/external/hash/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required (VERSION 3.0.2) + +project (HashTest) + +SET(CMAKE_C_FLAGS_DEBUG "-g") +SET(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG") + +add_executable (hash_test hash_test.c str_set.c token_map.c ht32.c ht64.c ht32rh.c ht64rh.c cmetrohash64.c) +add_executable (hash_test_32 hash_test.c str_set.c token_map.c ht32.c ht64.c ht32rh.c ht64rh.c PMurHash.c) +add_executable (hash_test_rh hash_test.c str_set.c token_map.c ht32.c ht64.c ht32rh.c ht64rh.c cmetrohash64.c) + +target_compile_definitions(hash_test_32 PRIVATE + -DHT_HASH_32) +target_compile_definitions(hash_test_rh PRIVATE + -DSTR_SET_RH -DTOKEN_MAP_RH) + +add_executable (load_test load_test.c ptr_set.c) +# robin hood hash table +add_executable (load_test_rh load_test.c ptr_set.c) + +target_compile_definitions(load_test PRIVATE + -DPTR_SET_INT_HASH) +target_compile_definitions(load_test_rh PRIVATE + -DPTR_SET_RH -DPTR_SET_INT_HASH) + +# default hash function +add_executable (load_test_d load_test.c ptr_set.c cmetrohash64.c) +add_executable (load_test_d_rh load_test.c ptr_set.c cmetrohash64.c) +target_compile_definitions(load_test_rh PRIVATE + -DPTR_SET_RH) + +add_test(hash_test hash_test) +add_test(hash_test_32 hash_test_32) +add_test(hash_test_rh hash_test_rh) +add_test(load_test load_test) +add_test(load_test_rh load_test_rh) + +enable_testing() diff --git a/external/hash/LICENSE b/external/hash/LICENSE new file mode 100644 index 0000000..a561b5f --- /dev/null +++ b/external/hash/LICENSE @@ -0,0 +1,28 @@ +This license applies to the content of the current directory. + +Some sources are externally provided - see respective file headers. +All source is MIT or public domain with varying copyright. + +Unless otherwise stated, the following license apply: + +The MIT License (MIT) + +Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/external/hash/PMurHash.c b/external/hash/PMurHash.c new file mode 100644 index 0000000..7284434 --- /dev/null +++ b/external/hash/PMurHash.c @@ -0,0 +1,334 @@ +/*----------------------------------------------------------------------------- + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. + * + * This implementation was written by Shane Day, and is also public domain. + * + * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) + * with support for progressive processing. + */ + +/*----------------------------------------------------------------------------- + +If you want to understand the MurmurHash algorithm you would be much better +off reading the original source. Just point your browser at: +http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + + +What this version provides? + +1. Progressive data feeding. Useful when the entire payload to be hashed +does not fit in memory or when the data is streamed through the application. +Also useful when hashing a number of strings with a common prefix. A partial +hash of a prefix string can be generated and reused for each suffix string. + +2. Portability. Plain old C so that it should compile on any old compiler. +Both CPU endian and access-alignment neutral, but avoiding inefficient code +when possible depending on CPU capabilities. + +3. Drop in. I personally like nice self contained public domain code, making it +easy to pilfer without loads of refactoring to work properly in the existing +application code & makefile structure and mucking around with licence files. +Just copy PMurHash.h and PMurHash.c and you're ready to go. + + +How does it work? + +We can only process entire 32 bit chunks of input, except for the very end +that may be shorter. So along with the partial hash we need to give back to +the caller a carry containing up to 3 bytes that we were unable to process. +This carry also needs to record the number of bytes the carry holds. I use +the low 2 bits as a count (0..3) and the carry bytes are shifted into the +high byte in stream order. + +To handle endianess I simply use a macro that reads a uint32_t and define +that macro to be a direct read on little endian machines, a read and swap +on big endian machines, or a byte-by-byte read if the endianess is unknown. + +-----------------------------------------------------------------------------*/ + + +#include "PMurHash.h" + +/* I used ugly type names in the header to avoid potential conflicts with + * application or system typedefs & defines. Since I'm not including any more + * headers below here I can rename these so that the code reads like C99 */ +#undef uint32_t +#define uint32_t MH_UINT32 +#undef uint8_t +#define uint8_t MH_UINT8 + +/* MSVC warnings we choose to ignore */ +#if defined(_MSC_VER) + #pragma warning(disable: 4127) /* conditional expression is constant */ +#endif + +/*----------------------------------------------------------------------------- + * Endianess, misalignment capabilities and util macros + * + * The following 3 macros are defined in this section. The other macros defined + * are only needed to help derive these 3. + * + * READ_UINT32(x) Read a little endian unsigned 32-bit int + * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries + * ROTL32(x,r) Rotate x left by r bits + */ + +/* Convention is to define __BYTE_ORDER == to one of these values */ +#if !defined(__BIG_ENDIAN) + #define __BIG_ENDIAN 4321 +#endif +#if !defined(__LITTLE_ENDIAN) + #define __LITTLE_ENDIAN 1234 +#endif + +/* I386 */ +#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386) + #define __BYTE_ORDER __LITTLE_ENDIAN + #define UNALIGNED_SAFE +#endif + +/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __), + * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */ +#if !defined(__BYTE_ORDER) + #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1 + #define __BYTE_ORDER __LITTLE_ENDIAN + #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1 + #define __BYTE_ORDER __BIG_ENDIAN + #endif +#endif + +/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */ +#if !defined(__BYTE_ORDER) + #if defined(__ARMEL__) || defined(__MIPSEL__) + #define __BYTE_ORDER __LITTLE_ENDIAN + #endif + #if defined(__ARMEB__) || defined(__MIPSEB__) + #define __BYTE_ORDER __BIG_ENDIAN + #endif +#endif + +/* Now find best way we can to READ_UINT32 */ +#if __BYTE_ORDER==__LITTLE_ENDIAN + /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ + #define READ_UINT32(ptr) (*((uint32_t*)(ptr))) +#elif __BYTE_ORDER==__BIG_ENDIAN + /* TODO: Add additional cases below where a compiler provided bswap32 is available */ + #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) + #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr)))) + #else + /* Without a known fast bswap32 we're just as well off doing this */ + #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + #define UNALIGNED_SAFE + #endif +#else + /* Unknown endianess so last resort is to read individual bytes */ + #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + + /* Since we're not doing word-reads we can skip the messing about with realignment */ + #define UNALIGNED_SAFE +#endif + +/* Find best way to ROTL32 */ +#if defined(_MSC_VER) + #include <stdlib.h> /* Microsoft put _rotl declaration in here */ + #define ROTL32(x,r) _rotl(x,r) +#else + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) +#endif + + +/*----------------------------------------------------------------------------- + * Core murmurhash algorithm macros */ + +#define C1 (0xcc9e2d51) +#define C2 (0x1b873593) + +/* This is the main processing body of the algorithm. It operates + * on each full 32-bits of input. */ +#define DOBLOCK(h1, k1) do{ \ + k1 *= C1; \ + k1 = ROTL32(k1,15); \ + k1 *= C2; \ + \ + h1 ^= k1; \ + h1 = ROTL32(h1,13); \ + h1 = h1*5+0xe6546b64; \ + }while(0) + + +/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ +/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ +#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \ + int _i = cnt; \ + while(_i--) { \ + c = c>>8 | *ptr++<<24; \ + n++; len--; \ + if(n==4) { \ + DOBLOCK(h1, c); \ + n = 0; \ + } \ + } }while(0) + +/*---------------------------------------------------------------------------*/ + +/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed + * if wanted. Both ph1 and pcarry are required arguments. */ +void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len) +{ + uint32_t h1 = *ph1; + uint32_t c = *pcarry; + + const uint8_t *ptr = (uint8_t*)key; + const uint8_t *end; + + /* Extract carry count from low 2 bits of c value */ + int n = c & 3; + +#if defined(UNALIGNED_SAFE) + /* This CPU handles unaligned word access */ + + /* Consume any carry bytes */ + int i = (4-n) & 3; + if(i && i <= len) { + DOBYTES(i, h1, c, n, ptr, len); + } + + /* Process 32-bit chunks */ + end = ptr + len/4*4; + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = READ_UINT32(ptr); + DOBLOCK(h1, k1); + } + +#else /*UNALIGNED_SAFE*/ + /* This CPU does not handle unaligned word access */ + + /* Consume enough so that the next data byte is word aligned */ + int i = -(long)ptr & 3; + if(i && i <= len) { + DOBYTES(i, h1, c, n, ptr, len); + } + + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ + end = ptr + len/4*4; + switch(n) { /* how many bytes in c */ + case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = READ_UINT32(ptr); + DOBLOCK(h1, k1); + } + break; + case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = c>>24; + c = READ_UINT32(ptr); + k1 |= c<<8; + DOBLOCK(h1, k1); + } + break; + case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = c>>16; + c = READ_UINT32(ptr); + k1 |= c<<16; + DOBLOCK(h1, k1); + } + break; + case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = c>>8; + c = READ_UINT32(ptr); + k1 |= c<<24; + DOBLOCK(h1, k1); + } + } +#endif /*UNALIGNED_SAFE*/ + + /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ + len -= len/4*4; + + /* Append any remaining bytes into carry */ + DOBYTES(len, h1, c, n, ptr, len); + + /* Copy out new running hash and carry */ + *ph1 = h1; + *pcarry = (c & ~0xff) | n; +} + +/*---------------------------------------------------------------------------*/ + +/* Finalize a hash. To match the original Murmur3A the total_length must be provided */ +uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length) +{ + uint32_t k1; + int n = carry & 3; + if(n) { + k1 = carry >> (4-n)*8; + k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1; + } + h ^= total_length; + + /* fmix */ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +/*---------------------------------------------------------------------------*/ + +/* Murmur3A compatable all-at-once */ +uint32_t PMurHash32(uint32_t seed, const void *key, int len) +{ + uint32_t h1=seed, carry=0; + PMurHash32_Process(&h1, &carry, key, len); + return PMurHash32_Result(h1, carry, len); +} + +/*---------------------------------------------------------------------------*/ + +/* Provide an API suitable for smhasher */ +void PMurHash32_test(const void *key, int len, uint32_t seed, void *out) +{ + uint32_t h1=seed, carry=0; + const uint8_t *ptr = (uint8_t*)key; + const uint8_t *end = ptr + len; + +#if 0 /* Exercise the progressive processing */ + while(ptr < end) { + //const uint8_t *mid = ptr + rand()%(end-ptr)+1; + const uint8_t *mid = ptr + (rand()&0xF); + mid = mid<end?mid:end; + PMurHash32_Process(&h1, &carry, ptr, mid-ptr); + ptr = mid; + } +#else + PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr)); +#endif + h1 = PMurHash32_Result(h1, carry, len); + *(uint32_t*)out = h1; +} + +/*---------------------------------------------------------------------------*/ +#ifdef TEST +int main() { + // http://www.cprover.org/cbmc/ + // cbmc PMurHash.c --function PMurHash32 --unwind 255 --bounds-check --pointer-check + //=> seed=308736u (00000000000001001011011000000000) + // key=INVALID-128 (1000000011111111111111111111111111111111111111111111110101100111) + // len=640 + // Violated property: + //file PMurHash.c line 201 function PMurHash32_Process + //dereference failure: object bounds + //!(POINTER_OFFSET(ptr) < 0) && OBJECT_SIZE(ptr) >= 1 + POINTER_OFFSET(ptr) || DYNAMIC_OBJECT(ptr) + + uint32_t seed = 308736; + unsigned long long key = 0x80fffffffffffd67ULL; + PMurHash32(seed, &key, sizeof(key)); +} +#endif diff --git a/external/hash/PMurHash.h b/external/hash/PMurHash.h new file mode 100644 index 0000000..28ead00 --- /dev/null +++ b/external/hash/PMurHash.h @@ -0,0 +1,64 @@ +/*----------------------------------------------------------------------------- + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. + * + * This implementation was written by Shane Day, and is also public domain. + * + * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) + * with support for progressive processing. + */ + +/* ------------------------------------------------------------------------- */ +/* Determine what native type to use for uint32_t */ + +/* We can't use the name 'uint32_t' here because it will conflict with + * any version provided by the system headers or application. */ + +/* First look for special cases */ +#if defined(_MSC_VER) + #define MH_UINT32 unsigned long +#endif + +/* If the compiler says it's C99 then take its word for it */ +#if !defined(MH_UINT32) && ( \ + defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L ) + #include <stdint.h> + #define MH_UINT32 uint32_t +#endif + +/* Otherwise try testing against max value macros from limit.h */ +#if !defined(MH_UINT32) + #include <limits.h> + #if (USHRT_MAX == 0xffffffffUL) + #define MH_UINT32 unsigned short + #elif (UINT_MAX == 0xffffffffUL) + #define MH_UINT32 unsigned int + #elif (ULONG_MAX == 0xffffffffUL) + #define MH_UINT32 unsigned long + #endif +#endif + +#if !defined(MH_UINT32) + #error Unable to determine type name for unsigned 32-bit int +#endif + +/* I'm yet to work on a platform where 'unsigned char' is not 8 bits */ +#define MH_UINT8 unsigned char + + +/* ------------------------------------------------------------------------- */ +/* Prototypes */ + +#ifdef __cplusplus +extern "C" { +#endif + +void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len); +MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length); +MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len); + +void PMurHash32_test(const void *key, int len, MH_UINT32 seed, void *out); + +#ifdef __cplusplus +} +#endif diff --git a/external/hash/README.md b/external/hash/README.md new file mode 100644 index 0000000..d0f03e8 --- /dev/null +++ b/external/hash/README.md @@ -0,0 +1,158 @@ +Generic hash table implementation with focus on being minimally +invasive on existing items to be indexed. + +The key is stored arbitrarily in the referenced item. A custom match +function `HT_MATCH` provides the necessary abstraction. Items are +NOT allocated by the hash table. + +Removed items are replaced with a sentinel value (1) to preserve +chaining. + +See the example implementations `hash_set.h`, `hash_item_table.h`, +and `hash_test.c`. + +The hash function can also be customized, see the default below. + +In all cases the key as assumed to be char string that is not +(necessarily) zero terminated. The length is given separately. Keys +can therefore be arbitrary binary values of arbitrary length. + +Instead of initializing the hash table, it may be zeroed. In that +case the count defaults to 4 upon first insert, meaning it can hold +up to 4 items before resizing or less depending on load factor. By +zeroing memory, hash tables use no memory until actually used. + +For increased portability we do not rely upon `stdint.h` outside the +default hash function. + +Build +----- + +There are no special build requirements. + +CMakeLists.txt simply links the appropriate hash function with the test +files, but CMake is not required, for example: + + cc load_test.c ptr_set.c cmetrohash64.c -O4 -DNDEBUG -o load_test + +There are several significant flags that can be set, but look at +`CMakeLists.txt`, `hash_test.c`, and `load_test.c`. + +`initbuild.sh` is an easy way to create a CMake Ninja build for +platforms that support it. + +Usage +----- + +The hash table is implemented in a generic form with a static (private) +interface. The macros + +`HASH_TABLE_HEADER(name, item)` defines the public prototype for the +specialized type, and `HASH_TABLE_API(name)` defines non-static wrapper +functions to access the generic implementation. This avoids creating all +the code as macros which are painful to develop and debug. + +See `token_map.h`, `token_map.c` which are used in `hash_test.c`. + +If the datatype is only needed in one file, the implementation such as +`token_map.c` can be included after defining `HT_PRIVATE`. This gives +the compiler better optimization opportunities and hides the interface +from other compilation units. + +The basic datatype `hash_table_t` is a small struct that can be embedded +anywhere and used as the instance of any hash table derived type. + + +Note on algorithmic choice +-------------------------- + +We use linear or quadratic probing hash tables because it allows for +many small hash tables. We overallocate the hash table by a factor 2 +(default) but only store a single pointer per item. This probing does +not allow for dense tables by itself, but because the hash table only +stores a single pointer per bucket, we can afford a larger table. +Advanced hashing such as Hopscotch can pack much more densely but +e.g. Hopscotch need to store a bitmask, thus already doubling the +size of the table. Hopscotch is probably good, but more complex and +depends on optimizing bit scan insructions, furthermore, when the use +case is many small tables such as symbol table scopes, cache locality +is less relevant. Chained hashing with 50% load factor is a good +choice, but require intrusive links, and cannot trivially hash string +sets without extra allocation. There is some evidence that linear +probing may be faster than quadratic probing due to cache effects, as +long as we do not pack too densely - however, the tradional quadratic +probing (k + i * i) modulo prime does not cover all buckets. We use +(k + i * (i + 1) / 2) modulo power of 2 which covers all buckets so +without experimentation it is unclear whether linear probing or +quadratic probing is best. + +The use of open addressing leads to more key comparisons than chained +hashing. The fact we store the keys indirectly in the stored item is +also not ideal, except when the item is also directly the key. If we +use larger hash tables from the saved space, we suspect this will +still perform well, also considering external factors such as not +having to allocate and copy a key from e.g. a text buffer being +parsed. + +It is generally understood that linear probing degrades significantly +with a load factor above 0.7. In this light, it is interesting to note +that Emmanuel Goossaert tested hopscotch hashing and found that bucket +swaps only take place in significance above a load factor of 0.7. A +commenter to Goossaert's blog also found that neighbourhoods rarely +exceed 64 even when allowed to grow on demand. Without deep analysis +it would appear that linear probing and hopscotch is pretty similar +at a load factor of 0.5 especially if tombstones are not present. +Because hopscotch requires extra data (e.g. the hash key or a bitmap +or a linked list) this confirms our intuition that it is better with +lower load factors and smaller buckets, than advanced algorithms. +Furthermore, hopscotch insert degrades badly when it needs to search for +empty buckets at high load factors. Of course, for on disk storage +it is a different matter, and this is why Goossaert is interested +in caching hash keys in buckets. + +Robin Hood hashing is mostly interesting when there are many deletions +to clean up and when the load factor increases. In our implementation we +try to keep the per bucket size small: a pointer and a 8 bit offset, or +just a pointer for the linear and quadratic probing implementations. +This makes it affordable with a lower load factor. + +This Robin Hood variation stores the offset from the hashed bucket to +where the first entry is stored. This means we can avoiding sampling any +bucket not indexed by the current hash key, and it also means that we +avoid having to store or calculate the hash key when updating. + +A sorted Robin Hood hashing implementation was also made, but it prooved +to be error prone with many special cases and slower than regular Robin +Hood hashing. It would conceivably protect against hash collision +attacks through exponential search, but insertions and deletions would +still need to move memory in linear time, making this point mood. +Therefore the sorted Robin Hood variant has been removed. + + +Section 4.5: +<http://codecapsule.com/2014/05/07/implementing-a-key-value-store-part-6-open-addressing-hash-tables/> + +<http://codecapsule.com/2013/08/11/hopscotch-hashing/> + +Source file references +---------------------- + +<http://www.jandrewrogers.com/2015/05/27/metrohash/> + +downloaded from + + <https://github.com/rurban/smhasher> + <https://github.com/rurban/smhasher/commit/00a4e5ab6bfb7b25bd3c7cf915f68984d4910cfd> + + <https://raw.githubusercontent.com/rurban/smhasher/master/cmetrohash64.c> + <https://raw.githubusercontent.com/rurban/smhasher/master/cmetrohash.h> + <https://raw.githubusercontent.com/rurban/smhasher/master/PMurHash.c> + <https://raw.githubusercontent.com/rurban/smhasher/master/PMurHash.h> + +As of July 2015, for 64-bit hashes, the C port of the 64 bit metro hash +is a good trade-off between speed and simplicity. The For a 32-bit C hash +function, the ported MurmurHash3 is safe and easy to use in this +environment, but xxHash32 may also be worth considering. + +See also <http://www.strchr.com/hash_functions> + diff --git a/external/hash/cmetrohash.h b/external/hash/cmetrohash.h new file mode 100644 index 0000000..b2c869a --- /dev/null +++ b/external/hash/cmetrohash.h @@ -0,0 +1,78 @@ +// metrohash.h +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Updated Nov. 2015 to use safe unaligned reads and platform neutral +// hash. This WILL change hashes on big endian platfors. / mikkelfj +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef CMETROHASH_METROHASH_H +#define CMETROHASH_METROHASH_H + +#include "ht_portable.h" +#include "unaligned.h" + +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + +#include <stdint.h> +#include <string.h> + +// MetroHash 64-bit hash functions +void cmetrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void cmetrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +/* rotate right idiom recognized by compiler*/ +inline static uint64_t crotate_right(uint64_t v, unsigned k) +{ + return (v >> k) | (v << (64 - k)); +} + +inline static uint64_t cread_u64(const void * const ptr) +{ + return (uint64_t)unaligned_read_le64toh(ptr); +} + +inline static uint64_t cread_u32(const void * const ptr) +{ + return (uint64_t)unaligned_read_le32toh(ptr); +} + +inline static uint64_t cread_u16(const void * const ptr) +{ + return (uint64_t)unaligned_read_le16toh(ptr); +} + +inline static uint64_t cread_u8 (const void * const ptr) +{ + return * (uint8_t *) ptr; +} + +#if defined (__cplusplus) +} +#endif +#endif // #ifndef CMETROHASH_METROHASH_H diff --git a/external/hash/cmetrohash64.c b/external/hash/cmetrohash64.c new file mode 100644 index 0000000..2923958 --- /dev/null +++ b/external/hash/cmetrohash64.c @@ -0,0 +1,185 @@ +// metrohash64.cpp +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "cmetrohash.h" + + +void cmetrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t * ptr = key; + const uint8_t * const end = ptr + len; + + uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len; + + if (len >= 32) + { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do + { + v[0] += cread_u64(ptr) * k0; ptr += 8; v[0] = crotate_right(v[0],29) + v[2]; + v[1] += cread_u64(ptr) * k1; ptr += 8; v[1] = crotate_right(v[1],29) + v[3]; + v[2] += cread_u64(ptr) * k2; ptr += 8; v[2] = crotate_right(v[2],29) + v[0]; + v[3] += cread_u64(ptr) * k3; ptr += 8; v[3] = crotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= crotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; + v[3] ^= crotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; + v[0] ^= crotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; + v[1] ^= crotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0, v1; + v0 = hash + (cread_u64(ptr) * k0); ptr += 8; v0 = crotate_right(v0,33) * k1; + v1 = hash + (cread_u64(ptr) * k1); ptr += 8; v1 = crotate_right(v1,33) * k2; + v0 ^= crotate_right(v0 * k0, 35) + v1; + v1 ^= crotate_right(v1 * k3, 35) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) + { + hash += cread_u64(ptr) * k3; ptr += 8; + hash ^= crotate_right(hash, 33) * k1; + + } + + if ((end - ptr) >= 4) + { + hash += cread_u32(ptr) * k3; ptr += 4; + hash ^= crotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) + { + hash += cread_u16(ptr) * k3; ptr += 2; + hash ^= crotate_right(hash, 13) * k1; + } + + if ((end - ptr) >= 1) + { + hash += cread_u8 (ptr) * k3; + hash ^= crotate_right(hash, 25) * k1; + } + + hash ^= crotate_right(hash, 33); + hash *= k0; + hash ^= crotate_right(hash, 33); + + memcpy(out, &hash, 8); +} + + +void cmetrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xD6D018F5; + static const uint64_t k1 = 0xA2AA033B; + static const uint64_t k2 = 0x62992FC1; + static const uint64_t k3 = 0x30BC5B29; + + const uint8_t * ptr = key; + const uint8_t * const end = ptr + len; + + uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len; + + if (len >= 32) + { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do + { + v[0] += cread_u64(ptr) * k0; ptr += 8; v[0] = crotate_right(v[0],29) + v[2]; + v[1] += cread_u64(ptr) * k1; ptr += 8; v[1] = crotate_right(v[1],29) + v[3]; + v[2] += cread_u64(ptr) * k2; ptr += 8; v[2] = crotate_right(v[2],29) + v[0]; + v[3] += cread_u64(ptr) * k3; ptr += 8; v[3] = crotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= crotate_right(((v[0] + v[3]) * k0) + v[1], 30) * k1; + v[3] ^= crotate_right(((v[1] + v[2]) * k1) + v[0], 30) * k0; + v[0] ^= crotate_right(((v[0] + v[2]) * k0) + v[3], 30) * k1; + v[1] ^= crotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0, v1; + v0 = hash + (cread_u64(ptr) * k2); ptr += 8; v0 = crotate_right(v0,29) * k3; + v1 = hash + (cread_u64(ptr) * k2); ptr += 8; v1 = crotate_right(v1,29) * k3; + v0 ^= crotate_right(v0 * k0, 34) + v1; + v1 ^= crotate_right(v1 * k3, 34) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) + { + hash += cread_u64(ptr) * k3; ptr += 8; + hash ^= crotate_right(hash, 36) * k1; + } + + if ((end - ptr) >= 4) + { + hash += cread_u32(ptr) * k3; ptr += 4; + hash ^= crotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) + { + hash += cread_u16(ptr) * k3; ptr += 2; + hash ^= crotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 1) + { + hash += cread_u8 (ptr) * k3; + hash ^= crotate_right(hash, 23) * k1; + } + + hash ^= crotate_right(hash, 28); + hash *= k0; + hash ^= crotate_right(hash, 29); + + memcpy(out, &hash, 8); +} + + diff --git a/external/hash/hash.h b/external/hash/hash.h new file mode 100644 index 0000000..c5a6fc6 --- /dev/null +++ b/external/hash/hash.h @@ -0,0 +1,115 @@ +#ifndef HASH_H +#define HASH_H + +/* Misc. hash functions that do not comply to a specific interface. */ + +#include <stdlib.h> + +#ifdef _MSC_VER +/* `inline` only advisory anyway. */ +#pragma warning(disable: 4710) /* function not inlined */ +#endif + +static inline uint32_t hash_fnv1a32_update(uint32_t seed, uint8_t *buf, size_t len) +{ + uint8_t *p = buf; +#ifndef FNV1A_NOMUL + const uint64_t prime = UINT32_C(0x1000193); +#endif + uint64_t hash = seed; + + while (len--) { + hash ^= (uint64_t)*p++; +#ifndef FNV1A_NOMUL + hash *= prime; +#else + hash += (hash << 1) + (hash << 4) + (hash << 7) + + (hash << 8) + (hash << 24); +#endif + } + return hash; +} + +static inline uint32_t hash_fnv1a32(uint8_t *buf, size_t len) +{ + return hash_fnv1a32_update(UINT32_C(0x811c9dc5), buf, len); +} + +static inline uint64_t hash_fnv1a64_update(uint64_t v, uint8_t *buf, size_t len) +{ + uint8_t *p = buf; +#ifndef FNV1A_NOMUL + const uint64_t prime = UINT64_C(0x100000001b3); +#endif + uint64_t hash = v; + + while (len--) { + hash ^= (uint64_t)*p++; +#ifndef FNV1A_NOMUL + hash *= prime; +#else + hash += (hash << 1) + (hash << 4) + (hash << 5) + + (hash << 7) + (hash << 8) + (hash << 40); +#endif + } + return hash; +} + +static inline uint64_t hash_fnv1a64(uint8_t *buf, size_t len) +{ + return hash_fnv1a64_update(UINT64_C(0xcbf29ce484222325), buf, len); +} + +/* + * MurmurHash 3 final mix with seed to handle 0. + * + * Width is number of bits of the value to return. + * http://stackoverflow.com/a/12996028 + */ +static inline uint32_t hash_bucket32(uint32_t v, size_t width) +{ + uint32_t x = v + UINT32_C(0x2f693b52); + + x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b); + x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b); + x = ((x >> 16) ^ x); + return x >> (32 - width); +} + +/* + * SplitMix64 - can be used to disperse fnv1a hash, to hash + * an integer, or as a simple non-cryptographic prng. + * + * Width is number of bits of the value to return. + * http://stackoverflow.com/a/12996028 + */ +static inline uint64_t hash_bucket64(uint64_t v, size_t width) +{ + uint64_t x = v + UINT64_C(0x9e3779b97f4a7c15); + + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + x = x ^ (x >> 31); + return x >> (64 - width); +} + +static inline uint64_t hash_random64(uint64_t *state) +{ + uint64_t x; + + x = hash_bucket64(*state, 64); + *state = x; + return x; +} + +/* + * Faster, less random hash bucket compared to hash_bucket32, but works + * for smaller integers. + */ +static inline uint32_t hash_mult32(uint32_t v, size_t width) +{ + /* Knuth's multiplicative hash. */ + return (v * UINT32_C(2654435761)) >> (32 - width); +} + +#endif /* HASH_H */ diff --git a/external/hash/hash_table.h b/external/hash/hash_table.h new file mode 100644 index 0000000..5c3e9cd --- /dev/null +++ b/external/hash/hash_table.h @@ -0,0 +1,266 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef HASH_TABLE_H +#define HASH_TABLE_H + +#include "ht_portable.h" +#include <stddef.h> + +/* + * Define HT_PRIVATE to make all name wrapping interface functions static + * inline when including hash implementation directly in user code. This + * can increase performance significantly (3x) on small hash tables with + * fast hash functions because the compiler can better optimize static + * functions. Some compiler optimizations will get the same speed + * with external linkage (clang 4.2 -O4 but not -O3). + * + * Can also be used to simple hide the interface from global + * linkage to avoid name clashes. + */ +#ifndef HT_PRIVATE +#define HT_PRIV +#else +#define HT_PRIV static inline +#endif + +/* + * Generic hash table type. This makes it possible to use hash tables + * in datastructures and header files that do not have access to + * the specific hash table implementation. Call to init is optional + * if the structure is zeroed. + * + * Offsets are only used with Robin Hood hashing to segment each chain. + * + * Keys and values are both stored in the same item pointer. There are + * downsides to this over a key / value represention, but since we also + * use less space we can afford lower the load factor and we can have a + * more complex key representations. The smaller bucket size also helps + * when ordering Robin Hood hash chains. + */ +typedef struct hash_table hash_table_t; +struct hash_table { + void *table; + char *offsets; + size_t count; + /* May be stored as a direct count, or log2. */ + size_t buckets; +}; + +enum hash_table_insert_mode { + ht_replace = 0, + ht_keep = 1, + ht_unique = 2, + ht_multi = 3, +}; + +/* + * This macro defines the prototypes of the hash table that user code + * needs for linkage. + * + * See also "hash_table_def.h" which builds wrapper functions to a + * generic hash table implementation so each specialization gets its own + * set of named functions. + * + * The HT_ITEM is normally a pointer to and the hash table does not + * store any signficant information internally. Customizations map + * the item value to a key. Certain values can be reserved, for + * example 0 indicates missing value, and sometimes 1 is reserved for + * internal tombstones and 2 may be used to return allocation failure. + */ +#define DECLARE_HASH_TABLE(HT_NAME, HT_ITEM) \ + \ +typedef hash_table_t HT_NAME##_t; \ +typedef HT_ITEM HT_NAME##_item_t; \ + \ +/* Prototype for user supplied callback when visiting all elements. */ \ +typedef void HT_NAME##_visitor_f(void *context, HT_ITEM item); \ + \ +extern const HT_NAME##_item_t HT_NAME##_missing; \ +extern const HT_NAME##_item_t HT_NAME##_nomem; \ +extern const HT_NAME##_item_t HT_NAME##_deleted; \ + \ +static inline int HT_NAME##_is_valid(HT_ITEM item) \ +{ \ + return \ + item != HT_NAME##_missing && \ + item != HT_NAME##_nomem && \ + item != HT_NAME##_deleted; \ +} \ + \ +static inline int HT_NAME##_is_missing(HT_ITEM item) \ +{ \ + return item == HT_NAME##_missing; \ +} \ + \ +static inline int HT_NAME##_is_nomem(HT_ITEM item) \ +{ \ + return item == HT_NAME##_nomem; \ +} \ + \ +static inline int HT_NAME##_is_deleted(HT_ITEM item) \ +{ \ + return item == HT_NAME##_deleted; \ +} \ + \ +/* \ + * Allocates enough buckets to represent count elements without resizing. \ + * The actual number of allocated buckets depends on the load factor \ + * given as a macro argument in the implementation. The bucket number \ + * rounds up to the nearest power of 2. \ + * \ + * `ht` should not be initialized beforehand, otherwise use resize. \ + * Alternatively, it is also valid to zero initialize the table by \ + * other means - this will postpone allocation until needed. \ + * \ + * The load factor (template argument) should be positive and at most \ + * 100%, otherwise insertion and resize cannot succeed. The recommended \ + * load factor is between 25% and 75%. \ + * \ + * Returns 0 on success, -1 on allocation failure or invalid load factor. \ + */ \ +HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count); \ + \ +/* \ + * Clears the allocated memory. Optionally takes a destructor \ + * that will visit all items. \ + * The table struct may be reused after being destroyed. \ + * May also be called on a zero initialised hash table. \ + * \ + * Can be called in place of clear for more control. \ + */ \ +HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht, \ + HT_NAME##_visitor_f *destructor, void *context); \ + \ +/* \ + * Clears the allocated memory, but does manage memory or state of any \ + * stored items. It is a simpler version of destroy. \ + */ \ +HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht); \ + \ +/* \ + * Resizes the hash table to hold at least `count` elements. \ + * The actual number of allocated buckets is a strictly larger power of \ + * two. If `count` is smaller than the current number of elements, \ + * that number is used instead of count. Thus, resize(ht, 0) may be \ + * used to reduce the table size after a spike. \ + * The function is called automatically as elements are inserted, \ + * but shrinking the table should be done manually. \ + * \ + * If resizing to same size, table is still reallocated but will then \ + * clean up old tombstones from excessive deletion. \ + * \ + * Returns 0 on success, -1 on allocation failure. \ + */ \ +HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count); \ + \ +/* \ + * Inserts an item pointer in one of the following modes: \ + * \ + * ht_keep: \ + * If the key exists, the stored item is kept and returned, \ + * otherwise it is inserted and null is returned. \ + * \ + * ht_replace: \ + * If the key exists, the stored item is replaced and the old \ + * item is returned, otherwise the item is inserted and null \ + * is returned. \ + * \ + * ht_unique: \ + * Inserts an item without checking if a key exists. Always return \ + * null. This is faster when it is known that the key does not exists. \ + * \ + * ht_multi: \ + * Same as ht_unique but with the intention that a duplicate key \ + * might exist. This should not be abused because not all hash table \ + * implementions work well with too many collissions. Robin Hood \ + * hashing might reallocate aggressively to keep the chain length \ + * down. Linear and Quadratic probing do handle this, albeit slow. \ + * \ + * The inserted item cannot have the value HT_MISSING and depending on \ + * implementation also not HT_DELETED and HT_NOMEM, but the \ + * definitions are type specific. \ + */ \ +HT_PRIV HT_ITEM HT_NAME##_insert(HT_NAME##_t *ht, \ + const void *key, size_t len, HT_ITEM item, int mode); \ + \ +/* Similar to insert, but derives key from item. */ \ +HT_PRIV HT_ITEM HT_NAME##_insert_item(HT_NAME##_t *ht, \ + HT_ITEM item, int mode); \ + \ +/* \ + * Finds the first matching item if any, or returns null. \ + * If there are duplicate keys, the first inserted is returned. \ + */ \ +HT_PRIV HT_ITEM HT_NAME##_find(HT_NAME##_t *ht, \ + const void *key, size_t len); \ + \ +/* \ + * Removes first inserted item that match the given key, if any. \ + * Returns the removed item if any, otherwise null. \ + */ \ +HT_PRIV HT_ITEM HT_NAME##_remove(HT_NAME##_t *ht, \ + const void *key, size_t len); \ + \ +/* \ + * Finds an item that compares the same as the given item but it is \ + * not necessarily the same item if it either isn't stored, or if \ + * there are duplicates in the table. \ + */ \ +HT_PRIV HT_ITEM HT_NAME##_find_item(HT_NAME##_t *ht, HT_ITEM item); \ + \ +/* \ + * This removes the first item that matches the given item, not \ + * necessarily the item itself, and the item need not be present \ + * in the table. Even if the item is in fact removed, it may still \ + * be present if stored multiple times through abuse use of the \ + * insert_unique function. \ + */ \ +HT_PRIV HT_ITEM HT_NAME##_remove_item(HT_NAME##_t *ht, HT_ITEM item); \ + \ +/* \ + * Calls a function for every item in the hash table. This may be \ + * used for destructing items, provided the table is not accessed \ + * subsequently. In fact, the hash_table_clear function takes an \ + * optional visitor that does exactly that. \ + * \ + * The function is linear of the allocated hash table size, so will be \ + * inefficient if the hash table was resized much larger than the number \ + * of stored items. In that case it is better to store links in the \ + * items. For the default resizing, the function is reasonably fast \ + * because for cache reasons it is very fast to exclude empty elements. \ + */ \ +HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht, \ + HT_NAME##_visitor_f *visitor, void *context); \ + \ +/* \ + * Returns number of elements in the table. (Not necessarily the number of \ + * unique keys. \ + */ \ +static inline size_t HT_NAME##_count(HT_NAME##_t *ht) \ +{ \ + return ht->count; \ +} \ + +#endif /* HASH_TABLE_H */ diff --git a/external/hash/hash_table_def.h b/external/hash/hash_table_def.h new file mode 100644 index 0000000..5362d47 --- /dev/null +++ b/external/hash/hash_table_def.h @@ -0,0 +1,154 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef HASH_TABLE_DEF_H +#define HASH_TABLE_DEF_H + +#include "ht_hash_function.h" +#ifndef HT_HASH_FUNCTION +/* + * If the default hash function is used, make sure to link with the + * appropriate hash implementation file. + */ +#define HT_HASH_FUNCTION ht_default_hash_function +#endif + +#ifndef HT_LOAD_FACTOR +#define HT_LOAD_FACTOR 0.7 +#endif + +#define HT_LOAD_FACTOR_FRAC ((size_t)((float)(HT_LOAD_FACTOR)*256)) + +#ifndef HT_PANIC +#include <stdio.h> +#define HT_PANIC(s) { fprintf(stderr, "aborting on panic: %s\n", s); exit(1); } +#endif + +#ifndef HT_MISSING +#define HT_MISSING ((ht_item_t)0) +#endif + +#ifndef HT_NOMEM +#define HT_NOMEM ((ht_item_t)1) +#endif + +#ifndef HT_DELETED +#define HT_DELETED ((ht_item_t)2) +#endif + +#define DEFINE_HASH_TABLE(HT_NAME) \ + \ +typedef HT_NAME##_item_t ht_item_t; \ +typedef HT_NAME##_visitor_f ht_visitor_f; \ + \ +/* User supplied. */ \ +static inline int ht_match(const void *key, size_t len, ht_item_t item); \ +static inline const void *ht_key(ht_item_t item); \ +static inline size_t ht_key_len(ht_item_t item); \ + \ +/* Implementation supplied. */ \ +static ht_item_t ht_insert(hash_table_t *ht, \ + const void *key, size_t len, ht_item_t new_item, int mode); \ +static ht_item_t ht_find(hash_table_t *ht, const void *key, size_t len); \ +static ht_item_t ht_remove(hash_table_t *ht, const void *key, size_t len); \ +static int ht_init(hash_table_t *ht, size_t count); \ +static int ht_resize(hash_table_t *ht, size_t count); \ +static void ht_clear(hash_table_t *ht); \ +static void ht_visit(hash_table_t *ht, \ + ht_visitor_f *visitor, void *context); \ + \ +const ht_item_t HT_NAME##_missing = HT_MISSING; \ +const ht_item_t HT_NAME##_nomem = HT_NOMEM; \ +const ht_item_t HT_NAME##_deleted = HT_DELETED; \ + \ +HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht) \ +{ \ + ht_clear(ht); \ +} \ + \ +HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht, \ + HT_NAME##_visitor_f *destructor, void *context) \ +{ \ + if (destructor) { \ + ht_visit(ht, destructor, context); \ + } \ + ht_clear(ht); \ +} \ + \ +HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count) \ +{ \ + return ht_init(ht, count); \ +} \ + \ +HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count) \ +{ \ + return ht_resize(ht, count); \ +} \ + \ +HT_PRIV ht_item_t HT_NAME##_insert(HT_NAME##_t *ht, \ + const void *key, size_t len, ht_item_t new_item, int mode) \ +{ \ + return ht_insert(ht, key, len, new_item, mode); \ +} \ + \ +HT_PRIV ht_item_t HT_NAME##_insert_item(HT_NAME##_t *ht, \ + ht_item_t item, int mode) \ +{ \ + return ht_insert(ht, \ + ht_key(item), \ + ht_key_len(item), \ + item, mode); \ +} \ + \ +HT_PRIV ht_item_t HT_NAME##_find(HT_NAME##_t *ht, \ + const void *key, size_t len) \ +{ \ + return ht_find(ht, key, len); \ +} \ + \ +HT_PRIV ht_item_t HT_NAME##_find_item(HT_NAME##_t *ht, ht_item_t item) \ +{ \ + return ht_find(ht, \ + ht_key(item), \ + ht_key_len(item)); \ +} \ + \ +HT_PRIV ht_item_t HT_NAME##_remove(HT_NAME##_t *ht, \ + const void *key, size_t len) \ +{ \ + return ht_remove(ht, key, len); \ +} \ + \ +HT_PRIV ht_item_t HT_NAME##_remove_item(HT_NAME##_t *ht, ht_item_t item) \ +{ \ + return ht_remove(ht, ht_key(item), ht_key_len(item)); \ +} \ + \ +HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht, \ + HT_NAME##_visitor_f *visitor, void *context) \ +{ \ + ht_visit(ht, visitor, context); \ +} \ + +#endif /* HASH_TABLE_DEF_H */ diff --git a/external/hash/hash_table_impl.h b/external/hash/hash_table_impl.h new file mode 100644 index 0000000..94fc9b8 --- /dev/null +++ b/external/hash/hash_table_impl.h @@ -0,0 +1,233 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +/* + * This file implements a generic hash interface such that different + * instances have the same name, but hidden from each other. + * The interface maps the local names to a public specific type. + * + * This implementations implements a hash table with linear or quadratic + * probing. + */ + +#ifdef HASH_TABLE_IMPL +#error "cannot have multiple implementations in same compilation unit" +#endif +#define HASH_TABLE_IMPL +/* Open Addressing */ +#define HT_OA + +#if defined(_MSC_VER) +#pragma warning(disable: 4127) /* conditional expression is constant */ +#endif + +#include <stdlib.h> +#include <assert.h> + +#ifndef HT_PROBE +#ifdef HT_PROBE_QUADRATIC +#define HT_PROBE(k, i, N) ((k + (i + i * i) / 2) & N) +#else +#define HT_PROBE(k, i, N) ((k + i) & N) +#endif +#endif + +static int ht_init(hash_table_t *ht, size_t count) +{ + size_t buckets = 4; + + if ((HT_LOAD_FACTOR_FRAC) > 256 || (HT_LOAD_FACTOR_FRAC) < 1) { + /* + * 100% is bad but still the users choice. + * 101% will never terminate insertion. + */ + HT_PANIC("hash table failed with impossible load factor"); + return -1; + } + while (count > buckets * (HT_LOAD_FACTOR_FRAC) / 256) { + buckets *= 2; + } + ht->table = calloc(buckets, sizeof(ht_item_t)); + if (ht->table == 0) { + return -1; + } + ht->offsets = 0; + ht->buckets = buckets; + ht->count = 0; + return 0; +} + +static int ht_resize(hash_table_t *ht, size_t count) +{ + size_t i; + hash_table_t ht2; + ht_item_t *T = ht->table; + void *item; + + if (count < ht->count) { + count = ht->count; + } + if (ht_init(&ht2, count)) { + return -1; + } + for (i = 0; i < ht->buckets; ++i) { + item = T[i]; + if ((item && item != HT_DELETED)) { + ht_insert(&ht2, ht_key(item), ht_key_len(item), item, ht_multi); + } + } + ht_clear(ht); + memcpy(ht, &ht2, sizeof(*ht)); + return 0; +} + +static ht_item_t ht_insert(hash_table_t *ht, + const void *key, size_t len, ht_item_t new_item, int mode) +{ + ht_item_t *T; + size_t N, i, j, k; + ht_item_t item, *vacant = 0; + + assert(new_item != HT_MISSING); + assert(new_item != HT_DELETED); + assert(new_item != HT_NOMEM); + + if (ht->count >= ht->buckets * (HT_LOAD_FACTOR_FRAC) / 256) { + if (ht_resize(ht, ht->count * 2)) { + HT_PANIC("hash table failed to allocate memory during resize"); + return HT_NOMEM; + } + } + T = ht->table; + N = ht->buckets - 1; + k = HT_HASH_FUNCTION(key, len); + i = 0; + j = HT_PROBE(k, i, N); + if (mode == ht_unique || mode == ht_multi) { + ++ht->count; + while (T[j] && T[j] != HT_DELETED) { + ++i; + j = HT_PROBE(k, i, N); + } + T[j] = new_item; + return 0; + } + while ((item = T[j])) { + if (item == HT_DELETED) { + if (vacant == 0) { + /* + * If a tombstone was found, use the first available, + * but continue search for possible match. + */ + vacant = &T[j]; + } + } else if (ht_match(key, len, item)) { + if (mode == ht_replace) { + T[j] = new_item; + } + return item; + } + ++i; + j = HT_PROBE(k, i, N); + } + if (vacant == 0) { + vacant = &T[j]; + } + ++ht->count; + *vacant = new_item; + return 0; +} + +static ht_item_t ht_find(hash_table_t *ht, const void *key, size_t len) +{ + ht_item_t *T = ht->table; + size_t N, i, j, k; + ht_item_t item; + + if (T == 0) { + return 0; + } + N = ht->buckets - 1; + k = HT_HASH_FUNCTION(key, len); + i = 0; + j = HT_PROBE(k, i, N); + while ((item = T[j])) { + if ((item != HT_DELETED) && + ht_match(key, len, item)) { + return item; + } + ++i; + j = HT_PROBE(k, i, N); + } + return 0; +} + +static ht_item_t ht_remove(hash_table_t *ht, const void *key, size_t len) +{ + ht_item_t *T = ht->table; + size_t N, i, j, k; + ht_item_t item; + + if (T == 0) { + return 0; + } + N = ht->buckets - 1; + k = HT_HASH_FUNCTION(key, len); + i = 0; + j = HT_PROBE(k, i, N); + while ((item = T[j])) { + if (item != HT_DELETED && + ht_match(key, len, item)) { + T[j] = HT_DELETED; + --ht->count; + return item; + } + ++i; + j = HT_PROBE(k, i, N); + } + return 0; +} + +static void ht_visit(hash_table_t *ht, ht_visitor_f *visitor, void *context) +{ + size_t i; + ht_item_t *T = ht->table; + ht_item_t item; + + for (i = 0; i < ht->buckets; ++i) { + item = T[i]; + if (item && item != HT_DELETED) { + visitor(context, item); + } + } +} + +static void ht_clear(hash_table_t *ht) +{ + if (ht->table) { + free(ht->table); + } + memset(ht, 0, sizeof(*ht)); +} diff --git a/external/hash/hash_table_impl_rh.h b/external/hash/hash_table_impl_rh.h new file mode 100644 index 0000000..b4cabae --- /dev/null +++ b/external/hash/hash_table_impl_rh.h @@ -0,0 +1,360 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* We use the same define for all implementations */ +#ifdef HASH_TABLE_IMPL +#error "cannot have multiple implementations in same compilation unit" +#endif +#define HASH_TABLE_IMPL +/* Robin Hood (with offset table) */ +#define HT_RH + +#if defined(_MSC_VER) +#pragma warning(disable: 4127) /* conditional expression is constant */ +#endif + +#include <stdlib.h> +#include <assert.h> + +/* + * A variation of Robin Hashing: + * We do not calcute distance from buckets, nor do we cache + * hash keys. Instead we maintain a 7-bit offset that points + * to where the first entry of a bucket is stored. In Robin Hood hashing + * all entries conceptually chained to the same bucket are stored + * immediately after each other in order of insertion. The offset of + * the next bucket is naturally the end of the previous bucket, off by + * one. This breaks down when the bucket offset is 0 and the bucket is + * empty because it suggests there is an element. We cannot distinguish + * between a single used and unused entry, except by looking at the + * content or otherwise tag the information on. This is not a problem, + * just a special case to deal with. + * + * The offsets are stored separately which might lead to more cache line + * traffic, but the alternative is not very elegant - either wasting + * space or trying to pack offsets on a per cache line basis. We only + * need 8 bits for offsets. If the offset overflows, bit 7 will be set + * which we can easily detect. During insertion, offsets are increated + * on all affected buckets, and likewise decrement on remove. In + * principle we can use bit parallel increments to update most offsets + * in a single operation, but it is hardly worthwhile due to setup + * cost. The approach bears some resemblance to hopscotch hashing which + * uses local offsets for chaining, but we prefer the simpler Robin + * Hood approach. + * + * If the offset overflows, the table is resized. We expect the packed + * chains to behave like a special case of a hopscotch layout and + * consequently have the same bounds, meaning we are unlikely to have + * neither long offsets nor long chains if we resize below very full + * so resizing on an offset of 128 should be ok. + * + * Our main motivation for this hashing is actually to get rid of + * tombstones in quadruatic and linear probing. Avoiding tombstones + * is much simpler when sorting chains Robin Hood style, and we avoid + * checking for tombstones. We loose this benefit by having to inspect + * offsets, but then also avoid checking keys before the chain, and + * after because we can zero in on exactly the entries belonging to + * bucket. + * + * Unlike traditional Robin Hood, we can find a missing key very quickly + * without any heuristics: we only need to inspect exactly the number + * of entries in the bucket (or at most 1 if the bucket is empty). + * + * Find operations start exactly at an entry with a matching hash key + * unlike normal Robin Hood which must scan past a few earlier entries + * on average, or guestimate where to start and seek both ways. + * + * We can also very quickly insert a key that is known to be unique + * because we can add it directly to the end (but possibly requiring + * a shift of later entries Robin Hood style). + * + * Whether these benefits outweighs the cost of a separate offset + * lookup is unclear, but the reduced memory consumption certainly + * allows for a lower load factor, which also helps a lot. + * + * Traditional Robin Hood Hashing actually permits a chain to become + * very long. We do not permit this, in line with hopscotch hashing. + * This is a drawback from a security perspective because worst case + * this can trigger resizing ad infinitum iff the hash function can + * be hacked or massive duplicate key insertion can be triggered. By + * used the provided hash functions and seeding them randomly at + * startup, and avoiding the multi key feature, it is very unlikely to + * be a problem with what is known about hash table attacks so far. + * + * Values and keys are not stored, only item pointers. Custom macroes + * or inline functions provide access to key data from the item. We + * could add a separate value array and treat the item strictly as a + * key, but we can have a smaller load factor instead, and can more + * easily avoid copying complex key structures, such as start end + * pointers to token data for parser. + * + * A typical hash table has: key pointer or key value, value pointer + * or value, a cached hash key or bitmap (for Robin Hood or Hopscotch) + * which on 64 bit platforms easily amounts to 20 bytes or more per + * bucket. We use 9 bytes on 64 bit platforms and 5 bytes on 32 bit. + * This gets us down to a max load of 0.5 and on average about 0.37. + * This should make it very likely that the first bucket inspected is + * a direct hit negating the benefit of caching hash keys. In addition, + * when it is not a direct hit, we get pointers loaded in a cache line + * to inspect, all known to have the same hash key. + */ + +int ht_init(hash_table_t *ht, size_t count) +{ + size_t buckets = 4; + + if ((HT_LOAD_FACTOR_FRAC) > 256 || (HT_LOAD_FACTOR_FRAC) < 1) { + /* + * 101% will never terminate insertion. + * 0% will never terminate resize. + */ + HT_PANIC("robin hood hash table failed with impossible load factor"); + return -1; + } + while (count > buckets * (HT_LOAD_FACTOR_FRAC) / 256) { + buckets *= 2; + } + ht->table = calloc(buckets, sizeof(ht_item_t)); + if (ht->table == 0) { + return -1; + } + ht->offsets = calloc(buckets, sizeof(char)); + if (ht->offsets == 0) { + free(ht->table); + ht->table = 0; + return -1; + } + ht->buckets = buckets; + ht->count = 0; + return 0; +} + +int ht_resize(hash_table_t *ht, size_t count) +{ + size_t i; + hash_table_t ht2; + ht_item_t *T = ht->table; + ht_item_t item; + + if (count < ht->count) { + count = ht->count; + } + if (ht_init(&ht2, count)) { + return -1; + } + for (i = 0; i < ht->buckets; ++i) { + item = T[i]; + if (item > (ht_item_t)1) { + ht_insert(&ht2, ht_key(item), ht_key_len(item), item, ht_multi); + } + } + ht_clear(ht); + memcpy(ht, &ht2, sizeof(*ht)); + return 0; +} + +ht_item_t ht_insert(hash_table_t *ht, + const void *key, size_t len, ht_item_t item, int mode) +{ + ht_item_t *T; + size_t N, n, j, k, offset; + ht_item_t new_item; + char overflow = 0; + + new_item = item; + if (ht->count >= ht->buckets * (HT_LOAD_FACTOR_FRAC) / 256) { + if (ht_resize(ht, ht->count * 2)) { + HT_PANIC("robin hood hash table failed to allocate memory during resize"); + return HT_NOMEM; + } + } + T = ht->table; + N = ht->buckets - 1; + k = HT_HASH_FUNCTION(key, len) & N; + offset = ht->offsets[k]; + j = (k + offset) & N; + /* + * T[j] == 0 is a special case because we cannot count + * zero probe length, and because we should not increment + * the offset at insertion point in this case. + * + * T[j] == 0 implies offset == 0, but this way we avoid + * hitting memory that we don't need. + */ + if (offset == 0 && T[j] == 0) { + ++ht->count; + T[j] = new_item; + return 0; + } + n = ht->offsets[(k + 1) & N] - offset + 1; + if (mode == ht_multi) { + /* Don't search for match before inserting. */ + j = (j + n) & N; + n = 0; + } + while (n--) { + item = T[j]; + if (ht_match(key, len, item)) { + if (mode == ht_replace) { + T[j] = new_item; + } + return item; + } + j = (j + 1) & N; + } + ++ht->count; + while (k != j) { + /* Only increment buckets after own bucket. */ + k = (k + 1) & N; + overflow |= ++ht->offsets[k]; + } + while ((item = T[j])) { + T[j] = new_item; + new_item = item; + j = (j + 1) & N; + overflow |= ++ht->offsets[j]; + } + T[j] = new_item; + + if (overflow < 0) { + /* + * At least one offset overflowed, so we need to + * resize the table. + */ + if (ht->count * 10 < ht->buckets) { + HT_PANIC("FATAL: hash table resize on low utilization would explode\n"\ + " possible collision DoS or bad hash function"); + return HT_NOMEM; + } + if (ht_resize(ht, ht->count * 2)) { + HT_PANIC("FATAL: hash table resize failed and left hash table inconsistent");\ + /* + * This renders the hash table in a bad state + * because we have updated to an inconsistent + * state. + */ + return HT_NOMEM; + } + } + return item; +} + +ht_item_t ht_find(hash_table_t *ht, const void *key, size_t len) +{ + ht_item_t *T = ht->table; + size_t N, n, j, k, offset; + ht_item_t item; + + if (T == 0) { + return 0; + } + N = ht->buckets - 1; + k = HT_HASH_FUNCTION(key, len) & N; + offset = ht->offsets[k]; + j = (k + offset) & N; + if (offset == 0 && T[j] == 0) { + /* Special case because we cannot count zero probe length. */ + return 0; + } + n = ht->offsets[(k + 1) & N] - offset + 1; + while (n--) { + item = T[j]; + if (ht_match(key, len, item)) { + return item; + } + j = (j + 1) & N; + } + return 0; +} + +ht_item_t ht_remove(hash_table_t *ht, const void *key, size_t len) +{ + ht_item_t *T = ht->table; + size_t N, n, j, k, offset; + ht_item_t item, *next_item; + + if (T == 0) { + return 0; + } + N = ht->buckets - 1; + k = HT_HASH_FUNCTION(key, len) & N; + offset = ht->offsets[k]; + j = (k + offset) & N; + if (offset == 0 && T[j] == 0) { + return 0; + } + n = ht->offsets[(k + 1) & N] - offset + 1; + while (n) { + item = T[j]; + if (ht_match(key, len, item)) { + break; + } + j = (j + 1) & N; + --n; + } + if (n == 0) { + return 0; + } + --ht->count; + while (k != j) { + /* Do not update the offset of the bucket that we own. */ + k = (k + 1) & N; + --ht->offsets[k]; + } + for (;;) { + j = (j + 1) & N; + if (ht->offsets[j] == 0) { + T[k] = 0; + return item; + } + --ht->offsets[j]; + T[k] = T[j]; + k = j; + } +} + +void ht_visit(hash_table_t *ht, ht_visitor_f *visitor, void *context) +{ + size_t i; + ht_item_t *T = ht->table; + ht_item_t item; + + for (i = 0; i < ht->buckets; ++i) { + item = T[i]; + if (item > (ht_item_t)1) { + visitor(context, item); + } + } +} + +void ht_clear(hash_table_t *ht) +{ + if (ht->table) { + free(ht->table); + } + if (ht->offsets) { + free(ht->offsets); + } + memset(ht, 0, sizeof(*ht)); +} diff --git a/external/hash/hash_test.c b/external/hash/hash_test.c new file mode 100644 index 0000000..d54cc07 --- /dev/null +++ b/external/hash/hash_test.c @@ -0,0 +1,419 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +/* Not used here, just included to catch compiler errors and warnings. */ +#include "hash.h" + +#include "str_set.h" +#include "token_map.h" +#include "ht64.h" +#include "ht32.h" +#include "ht64rh.h" +#include "ht32rh.h" + +#include "ht_trace.h" + +#define test_assert(x) if (!(x)) { printf("Test failed at %s:%d\n", __FILE__, __LINE__); assert(0); exit(1); } + + +str_set_t S; +token_map_t TM; + +char *keys[] = { + "foo", + "bar", + "baz", + "gimli", + "bofur" +}; + +struct token tokens[5]; + +void free_key(void *context, char *key) { + free(key); +} + +void test_str_set() +{ + int i; + char *s, *s0, *s1; + unsigned int n = sizeof(keys)/sizeof(keys[0]); + + /* We rely on zero initialization here. */ + test_assert(str_set_count(&S) == 0); + for (i = 0; i < n; ++i) { + s = keys[i]; + /* We don't have to use strdup, but we test the + * allocation management and item replacement. */ + s = str_set_insert(&S, s, strlen(s), strdup(s), ht_keep); + test_assert(str_set_count(&S) == i + 1); + test_assert(s == 0); + } + test_assert(n == 5); + for (i = 0; i < n; ++i) { + s = keys[i]; + s = str_set_find(&S, s, strlen(s)); + test_assert(strcmp(s, keys[i]) == 0); + } + s = str_set_remove(&S, "gimlibofur", 5); + test_assert(strcmp(s, "gimli") == 0); + free(s); + test_assert(str_set_count(&S) == n - 1); + s = str_set_remove(&S, "gimlibofur", 5); + test_assert(s == 0); + test_assert(str_set_count(&S) == n - 1); + s = str_set_insert(&S, "foobarbaz", 6, + (s0 = strndup("foobarbaz", 6)), ht_keep); + test_assert(s == 0); + test_assert(str_set_count(&S) == n); + s = str_set_insert(&S, "foobarbaz", 6, + (s1 = strndup("foobarbaz", 6)), ht_keep); + test_assert(s == s0); + free(s1); + test_assert(str_set_count(&S) == n); + s = str_set_find(&S, "foobar", 6); + test_assert(s == s0); + s = str_set_insert(&S, "foobarbaz", 6, + (s1 = strndup("foobarbaz", 6)), ht_replace); + test_assert(s == s0); + free(s); + s = str_set_find(&S, "foobar", 6); + test_assert(s == s1); + s = str_set_find(&S, "foobarbaz", 9); + test_assert(s == 0); + str_set_destroy(&S, free_key, 0); + s = str_set_find(&S, "foobar", 6); + test_assert(s == 0); + for (i = 0; i < n; ++i) { + s = keys[i]; + s = str_set_find(&S, s, strlen(s)); + test_assert(s == 0); + } +} + +void test_str_set2() +{ + int i; + char *s, *s1; + unsigned int n = sizeof(keys)/sizeof(keys[0]); + + for (i = 0; i < n; ++i) { + s = keys[i]; + str_set_insert(&S, s, strlen(s), s, ht_unique); + } + test_assert(str_set_count(&S) == n); + for (i = 0; i < n; ++i) { + s = keys[i]; + /* + * Unique and multi are the same logically, but different + * intentionally. + */ + str_set_insert(&S, s, strlen(s), s, ht_multi); + } + test_assert(str_set_count(&S) == 2 * n); + ht_trace_buckets(&S, "after double insert", 0, 8); + for (i = 0; i < n; ++i) { + s = keys[i]; + s1 = str_set_find(&S, s, strlen(s)); + test_assert(strcmp(s, s1) == 0); + } + for (i = 0; i < n; ++i) { + s = keys[i]; + s1 = str_set_remove(&S, s, strlen(s)); + test_assert(strcmp(s, s1) == 0); + test_assert(str_set_count(&S) == 2 * n - i - 1); + ht_trace_buckets(&S, "after single", 8, 8); + } + ht_trace_buckets(&S, "after first remove", 0, 8); + for (i = 0; i < n; ++i) { + s = keys[i]; + s1 = str_set_remove(&S, s, strlen(s)); + test_assert(strcmp(s, s1) == 0); + test_assert(str_set_count(&S) == n - i - 1); + } + ht_trace_buckets(&S, "efter second remove", 0, 8); + for (i = 0; i < n; ++i) { + s = keys[i]; + s1 = str_set_remove(&S, s, strlen(s)); + test_assert(s1 == 0); + test_assert(str_set_count(&S) == 0); + } + str_set_clear(&S); +} + +void test_str_set3() +{ + int i; + char *s, *s1; + unsigned int n = sizeof(keys)/sizeof(keys[0]); + + for (i = 0; i < n; ++i) { + s = keys[i]; + str_set_insert_item(&S, s, ht_unique); + } + test_assert(str_set_count(&S) == n); + for (i = 0; i < n; ++i) { + s = keys[i]; + str_set_insert_item(&S, s, ht_keep); + } + test_assert(str_set_count(&S) == n); + for (i = 0; i < n; ++i) { + s = keys[i]; + s1 = str_set_find_item(&S, s); + test_assert(strcmp(s, s1) == 0); + } + s = keys[1]; + s1 = str_set_remove_item(&S, s); + /* + * This doesn't always hold, but here we + * are sure because of how we inserted data. + */ + test_assert(s == s1); + s1 = str_set_find_item(&S, s); + test_assert(s1 == 0); + str_set_clear(&S); +} + +void test_str_set4() +{ + char *s, *s1; + + s = "dumble"; + str_set_insert_item(&S, "dumble", ht_keep); + s1 = str_set_find_item(&S, s); + /* TMnsert without replace. */ + str_set_insert_item(&S, "2dumble" + 1, ht_keep); + test_assert(s == s1); + s1 = str_set_find_item(&S, s); + test_assert(s == s1); + /* TMnsert with replace. */ + s1 = str_set_insert_item(&S, "2dumble" + 1, ht_replace); + /* Old value still returned. */ + test_assert(s == s1); + s1 = str_set_find_item(&S, s); + test_assert(s != s1); + /* New item returned. */ + test_assert(strcmp(s1 - 1, "2dumble") == 0); + str_set_clear(&S); +} + +void visit_item_set(void *context, token_map_item_t item) +{ + int *count = context; + ++*count; +} + +void test_token_map() +{ + int i, count; + token_map_item_t item; + unsigned int n = sizeof(keys)/sizeof(keys[0]); + + test_assert(sizeof(tokens)/sizeof(item[0]) == n); + + for (i = 0; i < n; ++i) { + tokens[i].token = keys[i]; + tokens[i].len = strlen(keys[i]); + } + for (i = 0; i < n; ++i) { + item = &tokens[i]; + token_map_insert(&TM, item->token, item->len, item, ht_unique); + } + count = 0; + token_map_visit(&TM, visit_item_set, &count); + test_assert(count == n); + + for (i = 0; i < n; ++i) { + item = token_map_find(&TM, keys[i], strlen(keys[i])); + test_assert(item->type == 0); + item->type = 1; + } + for (i = 0; i < n; ++i) { + item = token_map_find_item(&TM, &tokens[i]); + test_assert(item->type == 1); + item->type = 2; + } +} + +void test_ht32() +{ + uint32_t keys[100]; + int i, j; + ht32_t ht; + uint32_t *x, *y; + + ht32_init(&ht, 10); + for (i = 0; i < 100; ++i) { + keys[i] = i + 3398; + } + for (i = 0; i < 100; ++i) { + x = ht32_insert_item(&ht, &keys[i], ht_unique); + } + for (i = 0; i < 100; ++i) { + x = ht32_find_item(&ht, &keys[i]); + test_assert(x != 0); + test_assert(*x == i + 3398); + } + for (i = 0; i < 100; ++i) { + y = ht32_remove_item(&ht, &keys[i]); + test_assert(y != ht32_missing); + for (j = 0; j < 100; ++j) { + x = ht32_find_item(&ht, &keys[j]); + if (j > i) { + test_assert(x != ht32_missing); + test_assert(*x == j + 3398); + } else { + test_assert(x == ht32_missing); + } + } + } + ht32_clear(&ht); +} + +void test_ht64() +{ + uint64_t keys[100]; + int i, j; + ht64_t ht; + uint64_t *x, *y; + + ht64_init(&ht, 10); + for (i = 0; i < 100; ++i) { + keys[i] = i + 3398; + } + for (i = 0; i < 100; ++i) { + x = ht64_insert_item(&ht, &keys[i], ht_unique); + } + for (i = 0; i < 100; ++i) { + x = ht64_find_item(&ht, &keys[i]); + test_assert(x != 0); + test_assert(*x == i + 3398); + } + for (i = 0; i < 100; ++i) { + y = ht64_remove_item(&ht, &keys[i]); + test_assert(y != ht64_missing); + for (j = 0; j < 100; ++j) { + x = ht64_find_item(&ht, &keys[j]); + if (j > i) { + test_assert(x != ht64_missing); + test_assert(*x == j + 3398); + } else { + test_assert(x == ht64_missing); + } + } + } + ht64_clear(&ht); +} + +void test_ht32rh() +{ + uint32_t keys[100]; + int i, j; + ht32rh_t ht; + uint32_t *x, *y; + + ht32rh_init(&ht, 10); + for (i = 0; i < 100; ++i) { + keys[i] = i + 3398; + } + for (i = 0; i < 100; ++i) { + x = ht32rh_insert_item(&ht, &keys[i], ht_unique); + } + for (i = 0; i < 100; ++i) { + x = ht32rh_find_item(&ht, &keys[i]); + test_assert(x != 0); + test_assert(*x == i + 3398); + } + for (i = 0; i < 100; ++i) { + y = ht32rh_remove_item(&ht, &keys[i]); + test_assert(y != ht32rh_missing); + for (j = 0; j < 100; ++j) { + x = ht32rh_find_item(&ht, &keys[j]); + if (j > i) { + test_assert(x != ht32rh_missing); + test_assert(*x == j + 3398); + } else { + test_assert(x == ht32rh_missing); + } + } + } + ht32rh_clear(&ht); +} + +void test_ht64rh() +{ + uint64_t keys[100]; + int i, j; + ht64rh_t ht; + uint64_t *x, *y; + + ht64rh_init(&ht, 10); + for (i = 0; i < 100; ++i) { + keys[i] = i + 3398; + } + for (i = 0; i < 100; ++i) { + x = ht64rh_insert_item(&ht, &keys[i], ht_unique); + } + for (i = 0; i < 100; ++i) { + x = ht64rh_find_item(&ht, &keys[i]); + test_assert(x != 0); + test_assert(*x == i + 3398); + } + for (i = 0; i < 100; ++i) { + y = ht64rh_remove_item(&ht, &keys[i]); + test_assert(y != ht64rh_missing); + for (j = 0; j < 100; ++j) { + x = ht64rh_find_item(&ht, &keys[j]); + if (j > i) { + test_assert(x != ht64rh_missing); + test_assert(*x == j + 3398); + } else { + test_assert(x == ht64rh_missing); + } + } + } + ht64rh_clear(&ht); +} + +int main(int argc, char *argv[]) +{ + test_str_set(); + test_str_set2(); + test_str_set3(); + test_str_set4(); + test_token_map(); + test_ht32(); + test_ht64(); + test_ht32rh(); + test_ht64rh(); + + printf("all tests passed\n"); + + return 0; +} diff --git a/external/hash/ht32.c b/external/hash/ht32.c new file mode 100644 index 0000000..9954bde --- /dev/null +++ b/external/hash/ht32.c @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ht32.h" +#define HT_HASH_FUNCTION ht_uint32_hash_function + +#include "hash_table_def.h" +DEFINE_HASH_TABLE(ht32) + +#include "hash_table_impl.h" + + +static inline int ht_match(const void *key, size_t len, const ht32_item_t item) +{ + return *(const ht32_item_t)key == *item; +} + +static inline const void *ht_key(const ht32_item_t item) +{ + return (const void *)item; +} + +static inline size_t ht_key_len(const ht32_item_t item) +{ + return sizeof(*item); +} diff --git a/external/hash/ht32.h b/external/hash/ht32.h new file mode 100644 index 0000000..dab9ffb --- /dev/null +++ b/external/hash/ht32.h @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef HT32_H +#define HT32_H + +#ifndef UINT8_MAX +#include <stdint.h> +#endif + +#include "hash_table.h" + +DECLARE_HASH_TABLE(ht32, uint32_t *) + +#endif /* HT32_H */ diff --git a/external/hash/ht32rh.c b/external/hash/ht32rh.c new file mode 100644 index 0000000..de6dae2 --- /dev/null +++ b/external/hash/ht32rh.c @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ht32rh.h" +#define HT_HASH_FUNCTION ht_uint32_hash_function + +#include "hash_table_def.h" +DEFINE_HASH_TABLE(ht32rh) + +#include "hash_table_impl_rh.h" + + +static inline int ht_match(const void *key, size_t len, const ht32rh_item_t item) +{ + return *(const ht32rh_item_t)key == *item; +} + +static inline const void *ht_key(const ht32rh_item_t item) +{ + return (const void *)item; +} + +static inline size_t ht_key_len(const ht32rh_item_t item) +{ + return sizeof(*item); +} diff --git a/external/hash/ht32rh.h b/external/hash/ht32rh.h new file mode 100644 index 0000000..061328e --- /dev/null +++ b/external/hash/ht32rh.h @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef HT32RH_H +#define HT32RH_H + +#ifndef UINT8_MAX +#include <stdint.h> +#endif + +#include "hash_table.h" + +DECLARE_HASH_TABLE(ht32rh, uint32_t *) + +#endif /* HT32RH_H */ diff --git a/external/hash/ht64.c b/external/hash/ht64.c new file mode 100644 index 0000000..eaebbc5 --- /dev/null +++ b/external/hash/ht64.c @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ht64.h" +#define HT_HASH_FUNCTION ht_uint64_hash_function + +#include "hash_table_def.h" +DEFINE_HASH_TABLE(ht64) + +#include "hash_table_impl.h" + + +static inline int ht_match(const void *key, size_t len, const ht64_item_t item) +{ + return *(const ht64_item_t)key == *item; +} + +static inline const void *ht_key(const ht64_item_t item) +{ + return (const void *)item; +} + +static inline size_t ht_key_len(const ht64_item_t item) +{ + return sizeof(*item); +} diff --git a/external/hash/ht64.h b/external/hash/ht64.h new file mode 100644 index 0000000..b9f9fbe --- /dev/null +++ b/external/hash/ht64.h @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef HT64_H +#define HT64_H + +#ifndef UINT8_MAX +#include <stdint.h> +#endif + +#include "hash_table.h" + +DECLARE_HASH_TABLE(ht64, uint64_t *) + +#endif /* HT64_H */ diff --git a/external/hash/ht64rh.c b/external/hash/ht64rh.c new file mode 100644 index 0000000..bfde550 --- /dev/null +++ b/external/hash/ht64rh.c @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ht64rh.h" +#define HT_HASH_FUNCTION ht_uint64_hash_function + +#include "hash_table_def.h" +DEFINE_HASH_TABLE(ht64rh) + +#include "hash_table_impl_rh.h" + + +static inline int ht_match(const void *key, size_t len, const ht64rh_item_t item) +{ + return *(const ht64rh_item_t)key == *item; +} + +static inline const void *ht_key(const ht64rh_item_t item) +{ + return (const void *)item; +} + +static inline size_t ht_key_len(const ht64rh_item_t item) +{ + return sizeof(*item); +} diff --git a/external/hash/ht64rh.h b/external/hash/ht64rh.h new file mode 100644 index 0000000..5b3d454 --- /dev/null +++ b/external/hash/ht64rh.h @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef HT64RH_H +#define HT64RH_H + +#ifndef UINT8_MAX +#include <stdint.h> +#endif + +#include "hash_table.h" + +DECLARE_HASH_TABLE(ht64rh, uint64_t *) + +#endif /* HT64RH_H */ diff --git a/external/hash/ht_hash_function.h b/external/hash/ht_hash_function.h new file mode 100644 index 0000000..1f65ee5 --- /dev/null +++ b/external/hash/ht_hash_function.h @@ -0,0 +1,258 @@ +#ifndef HT_HASH_FUNCTION_H +#define HT_HASH_FUNCTION_H + +#include <stddef.h> + +#ifdef _MSC_VER +/* `inline` only advisory anyway. */ +#pragma warning(disable: 4710) /* function not inlined */ +#endif + +/* Avoid 0 special case in hash functions and allow for configuration with unguessable seed. */ +#ifndef HT_HASH_SEED +#define HT_HASH_SEED UINT32_C(0x2f693b52) +#endif + +#ifndef HT_HASH_32 + +#include "cmetrohash.h" + +static inline size_t ht_default_hash_function(const void *key, size_t len) +{ + uint64_t out; + + cmetrohash64_1((const uint8_t *)key, len, HT_HASH_SEED, (uint8_t *)&out); + return (unsigned int)out; +} + +/* When using the pointer directly as a hash key. */ +static inline size_t ht_ptr_hash_function(const void *key, size_t len) +{ + /* MurmurHash3 64-bit finalizer */ + uint64_t x; + + (void)len; + + x = ((uint64_t)(size_t)key) ^ (HT_HASH_SEED); + + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return (size_t)x; +} + +#else + +#include "PMurHash.h" + +static inline size_t ht_default_hash_function(const void *key, size_t len) +{ + return (size_t)PMurHash32((HT_HASH_SEED), key, (int)len); +} + +/* When using the pointer directly as a hash key. */ +static inline size_t ht_ptr_hash_function(const void *key, size_t len) +{ + /* http://stackoverflow.com/a/12996028 */ + size_t x; + + x = (size_t)key ^ (HT_HASH_SEED); + + x = ((x >> 16) ^ x) * 0x45d9f3bUL; + x = ((x >> 16) ^ x) * 0x45d9f3bUL; + x = ((x >> 16) ^ x); + return x; +} + +#endif /* HT_HASH_32 */ + + +/* This assumes the key points to a 32-bit aligned random value that is its own hash function. */ +static inline size_t ht_uint32_identity_hash_function(const void *key, size_t len) +{ + (void)len; + return (size_t)*(uint32_t *)key; +} + +/* This assumes the key points to a 64-bit aligned random value that is its own hash function. */ +static inline size_t ht_uint64_identity_hash_function(const void *key, size_t len) +{ + (void)len; + return (size_t)*(uint64_t *)key; +} + +/* This assumes the key points to a 32-bit aligned value. */ +static inline size_t ht_uint32_hash_function(const void *key, size_t len) +{ + uint32_t x = *(uint32_t *)key + (uint32_t)(HT_HASH_SEED); + + (void)len; + + /* http://stackoverflow.com/a/12996028 */ + x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b); + x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b); + x = ((x >> 16) ^ x); + return x; +} + +/* This assumes the key points to a 64-bit aligned value. */ +static inline size_t ht_uint64_hash_function(const void *key, size_t len) +{ + uint64_t x = *(uint64_t *)key + UINT64_C(0x9e3779b97f4a7c15) + (uint64_t)(HT_HASH_SEED); + + (void)len; + + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + return (size_t)(x ^ (x >> 31)); +} + +/* + * Suited for set operations of low-valued integers where the stored + * hash pointer is the key and the value. + * + * This function is especially useful for small hash tables (<1000) + * where collisions are cheap due to caching but also works for integer + * sets up to at least 1,000,000. + * + * NOTE: The multiplicative hash function by Knuth requires the modulo + * to table size be done by shifting the upper bits down, since this is + * where the quality bits reside. This yields significantly fewer + * collisions which is important for e.g. chained hashing. However, our + * interface does not provide the required information. + * + * When used in open hashing with load factors below 0.7 where the + * stored pointer is also the key, collision checking is very cheap and + * this pays off in a large range of table sizes where a more + * complicated hash simply doesn't pay off. + * + * When used with a pointer set where the pointer is also the key, it is + * not likely to work as well because the pointer acts as a large + * integer which works against the design of the hash function. Here a + * better mix function is probably worthwhile - therefore we also have + * ht_ptr_hash_function. + */ +static inline size_t ht_int_hash_function(const void *key, size_t len) +{ + (void)len; + return ((size_t)key ^ (HT_HASH_SEED)) * 2654435761UL; +} + +/* Bernsteins hash function, assumes string is zero terminated, len is ignored. */ +static inline size_t ht_str_hash_function(const void *key, size_t len) +{ + const unsigned char *str = key; + size_t hash = 5381 ^ (HT_HASH_SEED); + size_t c; + + (void)len; + + while ((c = (size_t)*str++)) + hash = ((hash << 5) + hash) ^ c; /* (hash * 33) xor c */ + + return hash; +} + +/* Hashes at most len characters or until zero termination. */ +static inline size_t ht_strn_hash_function(const void *key, size_t len) +{ + const unsigned char *str = key; + size_t hash = 5381 ^ (HT_HASH_SEED); + size_t c; + + while (--len && (c = (size_t)*str++)) + hash = ((hash << 5) + hash) ^ c; /* (hash * 33) xor c */ + + return hash; +} + +static inline uint32_t ht_fnv1a32_hash_function(const void *key, size_t len) +{ +#ifndef FNV1A_NOMUL + const uint32_t prime = UINT32_C(0x1000193); +#endif + uint32_t hash = UINT32_C(0x811c9dc5); + const uint8_t *p = key; + + while (len--) { + hash ^= (uint64_t)*p++; +#ifndef FNV1A_NOMUL + hash *= prime; +#else + hash += (hash << 1) + (hash << 4) + (hash << 7) + + (hash << 8) + (hash << 24); +#endif + } + return hash; +} + +static inline uint64_t ht_fnv1a64_hash_function(const void *key, size_t len) +{ +#ifndef FNV1A_NOMUL + const uint64_t prime = UINT64_C(0x100000001b3); +#endif + uint64_t hash = UINT64_C(0xcbf29ce484222325); + const uint8_t *p = key; + + while (len--) { + hash ^= (uint64_t)*p++; +#ifndef FNV1A_NOMUL + hash *= prime; +#else + hash += (hash << 1) + (hash << 4) + (hash << 5) + + (hash << 7) + (hash << 8) + (hash << 40); +#endif + } + return hash; +} + +/* Hashes until string termination and ignores length argument. */ +static inline uint32_t ht_fnv1a32_str_hash_function(const void *key, size_t len) +{ +#ifndef FNV1A_NOMUL + const uint32_t prime = UINT32_C(0x1000193); +#endif + uint32_t hash = UINT32_C(0x811c9dc5); + const uint8_t *p = key; + + (void)len; + + while (*p) { + hash ^= (uint64_t)*p++; +#ifndef FNV1A_NOMUL + hash *= prime; +#else + hash += (hash << 1) + (hash << 4) + (hash << 7) + + (hash << 8) + (hash << 24); +#endif + } + return hash; +} + +/* Hashes until string termination and ignores length argument. */ +static inline uint64_t ht_fnv1a64_str_hash_function(const void *key, size_t len) +{ +#ifndef FNV1A_NOMUL + const uint64_t prime = UINT64_C(0x100000001b3); +#endif + uint64_t hash = UINT64_C(0xcbf29ce484222325); + const uint8_t *p = key; + + (void)len; + + while (*p) { + hash ^= (uint64_t)*p++; +#ifndef FNV1A_NOMUL + hash *= prime; +#else + hash += (hash << 1) + (hash << 4) + (hash << 5) + + (hash << 7) + (hash << 8) + (hash << 40); +#endif + } + return hash; +} + + +#endif /* HT_HASH_FUNCTION_H */ diff --git a/external/hash/ht_portable.h b/external/hash/ht_portable.h new file mode 100644 index 0000000..3affc1d --- /dev/null +++ b/external/hash/ht_portable.h @@ -0,0 +1,9 @@ +#ifndef HT_PORTABLE_H +#define HT_PORTABLE_H + +#if defined(_MSC_VER) && !defined(inline) +#define inline __inline +#endif +#include "pstdint.h" + +#endif diff --git a/external/hash/ht_trace.h b/external/hash/ht_trace.h new file mode 100644 index 0000000..63af4a8 --- /dev/null +++ b/external/hash/ht_trace.h @@ -0,0 +1,59 @@ +#ifndef HT_TRACE_H +#define HT_TRACE_H + +#ifdef HT_TRACE_ON +#ifndef HT_TRACE_OUT +#define HT_TRACE_OUT stderr +#endif + +#include <stdio.h> +#define ht_trace(s) fprintf(HT_TRACE_OUT, "trace: %s\n", s) +#define ht_tracei(s, i) fprintf(HT_TRACE_OUT, "trace: %s: %d\n", s, (int)i) +#define ht_tracex(s, x) fprintf(HT_TRACE_OUT, "trace: %s: 0x%lx\n", s, (long)x) +#define ht_traces(s, s2, len) fprintf(HT_TRACE_OUT, "trace: %s: %.*s\n", s, (int)len, s2) + +static void ht_trace_buckets(hash_table_t *ht, char *msg, int first, int count) +{ + int i, j, N, n; + + n = ht->buckets; + N = n - 1; + + if (count == 0) { + count = 32; + } + if (count > n) { + count = n; + } + + first = first & N; + fprintf(HT_TRACE_OUT, "bucket trace: %s\n", msg); + if (n > count) { + n = count; + } + fprintf(HT_TRACE_OUT, "item count: %ld, bucket count %ld, utilization: %0.1f%%\n", + ht->count, ht->buckets, (double)ht->count / ht->buckets * 100); + + if (ht->offsets) { + for (i = 0; i < n; ++i) { + j = (first + i) & N; + fprintf(HT_TRACE_OUT, "%03d:%08x:[%02d]\n", + j, (unsigned int)((void **)ht->table)[j], (unsigned int)ht->offsets[j]); + } + } else { + for (i = 0; i < n; ++i) { + j = (first + i) & N; + fprintf(HT_TRACE_OUT, "%03d:%08x\n", j, (unsigned int)((void **)ht->table)[j]); + } + } + fprintf(HT_TRACE_OUT, "--\n"); +} +#else +#define ht_trace(arg1) ((void)0) +#define ht_tracei(arg1, arg2) ((void)0) +#define ht_tracex(arg1, arg2) ((void)0) +#define ht_traces(arg1, arg2, arg3) ((void)0) +#define ht_trace_buckets(arg1, arg2, arg3, arg4) ((void)0) +#endif + +#endif /* HT_TRACE_H */ diff --git a/external/hash/initbuild.sh b/external/hash/initbuild.sh new file mode 100755 index 0000000..34a3fc0 --- /dev/null +++ b/external/hash/initbuild.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +cd `dirname $0` +mkdir -p "build/release" +cd build/release && cmake -GNinja ../.. -DCMAKE_BUILD_TYPE=Release && ninja diff --git a/external/hash/initbuild_debug.sh b/external/hash/initbuild_debug.sh new file mode 100755 index 0000000..d190139 --- /dev/null +++ b/external/hash/initbuild_debug.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +cd `dirname $0` +mkdir -p "build/debug" +cd build/debug && cmake -GNinja ../.. -DCMAKE_BUILD_TYPE=Debug && ninja diff --git a/external/hash/int_set.h b/external/hash/int_set.h new file mode 100644 index 0000000..b873ef9 --- /dev/null +++ b/external/hash/int_set.h @@ -0,0 +1,50 @@ +#ifndef INT_SET_H +#define INT_SET_H + +#include "ptr_set.h" + +/* + * The values 0, 1, and 2 are reserved so we map integers + * before casting them to void *. + * + * Instead we disallow the largest positive integers. + * + * This is specfic to the implementation of ptr_set, so + * if it changes, we may have to change here as well. + */ + +#define HT_INT_SET_OFFSET ((1 << (8 * sizeof(int) - 1)) - 2) +#define HT_INT_TO_PTR(x) ((void *)(size_t)((x) - HT_INT_SET_OFFSET)) +#define HT_PTR_TO_INT(x) ((int)(size_t)(x) + HT_INT_SET_OFFSET) + +/* Return value helpers. */ +#define INT_SET_IS_MISSING(x) (HT_PTR_SET_MISSING(HT_INT_TO_PTR(x))) +#define INT_SET_IS_ERROR(x) (HT_PTR_SET_IS_ERROR(HT_INT_TO_PTR(x))) +#define INT_SET_IS_VALID(x) (HT_PTR_SET_IS_VALID(HT_INT_TO_PTR(x))) + +typedef ptr_set_t int_set_t; + +/* Returns 1 if already present, 0 otherwise. */ +static inline int int_set_add(int_set_t *S, int x) +{ + return ptr_set_insert_item(S, HT_INT_TO_PTR(x), ht_keep) != 0; +} + +/* Returns 1 if removed, 0 otherwise. */ +static inline int int_set_remove(int_set_t *S, int x) +{ + return ptr_set_remove_item(S, HT_INT_TO_PTR(x)) != 0; +} + +static inline int int_set_count(int_set_t *S) +{ + return ptr_set_count(S); +} + +/* Returns 1 if present, 0 otherwise. */ +static inline int int_set_exists(int_set_t *S, int x) +{ + return ptr_set_exists(S, HT_INT_TO_PTR(x)); +} + +#endif /* INT_SET_H */ diff --git a/external/hash/load_test.c b/external/hash/load_test.c new file mode 100644 index 0000000..1c3d0e7 --- /dev/null +++ b/external/hash/load_test.c @@ -0,0 +1,86 @@ +#include <assert.h> +#include <sys/time.h> +#include <stdio.h> + +//#define INT_SET_PRIVATE +#ifdef INT_SET_PRIVATE +/* Make all hash functions private to this module for better + * performance. This may not be necessary depending on compiler + * optimizations. clang 4.2 -O3 benefits while -O4 figures it and get + * same speed with external linkage. */ +#define HT_PRIVATE +#include "int_set.h" +#include "ptr_set.c" +#undef HT_PRIVATE +#else +/* Use external linkage. Link with ptr_set.c which int_set depends upon. */ +#include "int_set.h" +#endif + +struct timeval time_diff(struct timeval start, struct timeval end) +{ + struct timeval temp; + if ((end.tv_usec-start.tv_usec)<0) { + temp.tv_sec = end.tv_sec-start.tv_sec-1; + temp.tv_usec = 1000000+end.tv_usec-start.tv_usec; + } else { + temp.tv_sec = end.tv_sec-start.tv_sec; + temp.tv_usec = end.tv_usec-start.tv_usec; + } + return temp; +} + +double elapsed_ms(struct timeval td) +{ + return (double)td.tv_sec * 1000 + (double)td.tv_usec / 1000; +} + +void test_int_set() +{ + int i, x; + const int N = 1000000; + //const int N = 1000; + int_set_t ht = {0}; + int_set_t *S = &ht; + double ms, nsop, opms; + struct timeval t1, t2, td; + + for (i = 1; i <= N; ++i) { + int_set_add(S, i); + assert(int_set_exists(S, i)); + } + assert(int_set_count(S) == N); + + for (i = 1; i <= N; ++i) { + assert(int_set_exists(S, i)); + } + + gettimeofday(&t1, 0); + for (x = 0, i = 1; i <= N; ++i) { + x += int_set_exists(S, i); + } + gettimeofday(&t2, 0); + + td = time_diff(t1, t2); + ms = elapsed_ms(td); + + nsop = ms * 1000000 / x; + opms = (double)x / ms; + printf("%d out of %d keys found in time %0.03f ms or %0.01f ns per op\n", + x, N, ms, nsop); + printf("ops / ms: %0.0f\n", opms); + + for (i = 1; i <= N; ++i) { + assert(int_set_count(S) == N - i + 1); + assert(int_set_exists(S, i)); + int_set_remove(S, i); + assert(!int_set_exists(S, i)); + } + assert(int_set_count(S) == 0); +} + +int main(int argc, char *argv[]) +{ + test_int_set(); + return 0; +} diff --git a/external/hash/pstdint.h b/external/hash/pstdint.h new file mode 100644 index 0000000..14444aa --- /dev/null +++ b/external/hash/pstdint.h @@ -0,0 +1,898 @@ +/* A portable stdint.h + **************************************************************************** + * BSD License: + **************************************************************************** + * + * Copyright (c) 2005-2016 Paul Hsieh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************** + * + * Version 0.1.15.2 + * + * The ANSI C standard committee, for the C99 standard, specified the + * inclusion of a new standard include file called stdint.h. This is + * a very useful and long desired include file which contains several + * very precise definitions for integer scalar types that is + * critically important for making portable several classes of + * applications including cryptography, hashing, variable length + * integer libraries and so on. But for most developers its likely + * useful just for programming sanity. + * + * The problem is that some compiler vendors chose to ignore the C99 + * standard and some older compilers have no opportunity to be updated. + * Because of this situation, simply including stdint.h in your code + * makes it unportable. + * + * So that's what this file is all about. Its an attempt to build a + * single universal include file that works on as many platforms as + * possible to deliver what stdint.h is supposed to. Even compilers + * that already come with stdint.h can use this file instead without + * any loss of functionality. A few things that should be noted about + * this file: + * + * 1) It is not guaranteed to be portable and/or present an identical + * interface on all platforms. The extreme variability of the + * ANSI C standard makes this an impossibility right from the + * very get go. Its really only meant to be useful for the vast + * majority of platforms that possess the capability of + * implementing usefully and precisely defined, standard sized + * integer scalars. Systems which are not intrinsically 2s + * complement may produce invalid constants. + * + * 2) There is an unavoidable use of non-reserved symbols. + * + * 3) Other standard include files are invoked. + * + * 4) This file may come in conflict with future platforms that do + * include stdint.h. The hope is that one or the other can be + * used with no real difference. + * + * 5) In the current verison, if your platform can't represent + * int32_t, int16_t and int8_t, it just dumps out with a compiler + * error. + * + * 6) 64 bit integers may or may not be defined. Test for their + * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. + * Note that this is different from the C99 specification which + * requires the existence of 64 bit support in the compiler. If + * this is not defined for your platform, yet it is capable of + * dealing with 64 bits then it is because this file has not yet + * been extended to cover all of your system's capabilities. + * + * 7) (u)intptr_t may or may not be defined. Test for its presence + * with the test: #ifdef PTRDIFF_MAX. If this is not defined + * for your platform, then it is because this file has not yet + * been extended to cover all of your system's capabilities, not + * because its optional. + * + * 8) The following might not been defined even if your platform is + * capable of defining it: + * + * WCHAR_MIN + * WCHAR_MAX + * (u)int64_t + * PTRDIFF_MIN + * PTRDIFF_MAX + * (u)intptr_t + * + * 9) The following have not been defined: + * + * WINT_MIN + * WINT_MAX + * + * 10) The criteria for defining (u)int_least(*)_t isn't clear, + * except for systems which don't have a type that precisely + * defined 8, 16, or 32 bit types (which this include file does + * not support anyways). Default definitions have been given. + * + * 11) The criteria for defining (u)int_fast(*)_t isn't something I + * would trust to any particular compiler vendor or the ANSI C + * committee. It is well known that "compatible systems" are + * commonly created that have very different performance + * characteristics from the systems they are compatible with, + * especially those whose vendors make both the compiler and the + * system. Default definitions have been given, but its strongly + * recommended that users never use these definitions for any + * reason (they do *NOT* deliver any serious guarantee of + * improved performance -- not in this file, nor any vendor's + * stdint.h). + * + * 12) The following macros: + * + * PRINTF_INTMAX_MODIFIER + * PRINTF_INT64_MODIFIER + * PRINTF_INT32_MODIFIER + * PRINTF_INT16_MODIFIER + * PRINTF_LEAST64_MODIFIER + * PRINTF_LEAST32_MODIFIER + * PRINTF_LEAST16_MODIFIER + * PRINTF_INTPTR_MODIFIER + * + * are strings which have been defined as the modifiers required + * for the "d", "u" and "x" printf formats to correctly output + * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, + * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. + * PRINTF_INTPTR_MODIFIER is not defined for some systems which + * provide their own stdint.h. PRINTF_INT64_MODIFIER is not + * defined if INT64_MAX is not defined. These are an extension + * beyond what C99 specifies must be in stdint.h. + * + * In addition, the following macros are defined: + * + * PRINTF_INTMAX_HEX_WIDTH + * PRINTF_INT64_HEX_WIDTH + * PRINTF_INT32_HEX_WIDTH + * PRINTF_INT16_HEX_WIDTH + * PRINTF_INT8_HEX_WIDTH + * PRINTF_INTMAX_DEC_WIDTH + * PRINTF_INT64_DEC_WIDTH + * PRINTF_INT32_DEC_WIDTH + * PRINTF_INT16_DEC_WIDTH + * PRINTF_UINT8_DEC_WIDTH + * PRINTF_UINTMAX_DEC_WIDTH + * PRINTF_UINT64_DEC_WIDTH + * PRINTF_UINT32_DEC_WIDTH + * PRINTF_UINT16_DEC_WIDTH + * PRINTF_UINT8_DEC_WIDTH + * + * Which specifies the maximum number of characters required to + * print the number of that type in either hexadecimal or decimal. + * These are an extension beyond what C99 specifies must be in + * stdint.h. + * + * Compilers tested (all with 0 warnings at their highest respective + * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 + * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio + * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 + * + * This file should be considered a work in progress. Suggestions for + * improvements, especially those which increase coverage are strongly + * encouraged. + * + * Acknowledgements + * + * The following people have made significant contributions to the + * development and testing of this file: + * + * Chris Howie + * John Steele Scott + * Dave Thorup + * John Dill + * Florian Wobbe + * Christopher Sean Morrison + * Mikkel Fahnoe Jorgensen + * + */ + +#include <stddef.h> +#include <limits.h> +#include <signal.h> + +/* + * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and + * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. + */ + +#if ((defined(_MSC_VER) && _MSC_VER >= 1600) || (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED) +#include <stdint.h> +#define _PSTDINT_H_INCLUDED +# if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__)) && !(defined(__APPLE__) && defined(__MACH__)) +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "l" +# endif +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +# else +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# ifndef PRINTF_INT32_MODIFIER +# if (UINT_MAX == UINT32_MAX) +# define PRINTF_INT32_MODIFIER "" +# else +# define PRINTF_INT32_MODIFIER "l" +# endif +# endif +# endif +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +# endif +# ifndef PRINTF_UINT64_HEX_WIDTH +# define PRINTF_UINT64_HEX_WIDTH "16" +# endif +# ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +# endif +# ifndef PRINTF_UINT32_HEX_WIDTH +# define PRINTF_UINT32_HEX_WIDTH "8" +# endif +# ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +# endif +# ifndef PRINTF_UINT16_HEX_WIDTH +# define PRINTF_UINT16_HEX_WIDTH "4" +# endif +# ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +# endif +# ifndef PRINTF_UINT8_HEX_WIDTH +# define PRINTF_UINT8_HEX_WIDTH "2" +# endif +# ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "19" +# endif +# ifndef PRINTF_UINT64_DEC_WIDTH +# define PRINTF_UINT64_DEC_WIDTH "20" +# endif +# ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +# endif +# ifndef PRINTF_UINT32_DEC_WIDTH +# define PRINTF_UINT32_DEC_WIDTH "10" +# endif +# ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +# endif +# ifndef PRINTF_UINT16_DEC_WIDTH +# define PRINTF_UINT16_DEC_WIDTH "5" +# endif +# ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +# endif +# ifndef PRINTF_UINT8_DEC_WIDTH +# define PRINTF_UINT8_DEC_WIDTH "3" +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_UINT64_HEX_WIDTH +# endif +# ifndef PRINTF_UINTMAX_HEX_WIDTH +# define PRINTF_UINTMAX_HEX_WIDTH PRINTF_UINT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_UINT64_DEC_WIDTH +# endif +# ifndef PRINTF_UINTMAX_DEC_WIDTH +# define PRINTF_UINTMAX_DEC_WIDTH PRINTF_UINT64_DEC_WIDTH +# endif + +/* + * Something really weird is going on with Open Watcom. Just pull some of + * these duplicated definitions from Open Watcom's stdint.h file for now. + */ + +# if defined (__WATCOMC__) && __WATCOMC__ >= 1250 +# if !defined (INT64_C) +# define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) +# endif +# if !defined (UINT64_C) +# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) +# endif +# if !defined (INT32_C) +# define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) +# endif +# if !defined (UINT32_C) +# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) +# endif +# if !defined (INT16_C) +# define INT16_C(x) (x) +# endif +# if !defined (UINT16_C) +# define UINT16_C(x) (x) +# endif +# if !defined (INT8_C) +# define INT8_C(x) (x) +# endif +# if !defined (UINT8_C) +# define UINT8_C(x) (x) +# endif +# if !defined (UINT64_MAX) +# define UINT64_MAX 18446744073709551615ULL +# endif +# if !defined (INT64_MAX) +# define INT64_MAX 9223372036854775807LL +# endif +# if !defined (UINT32_MAX) +# define UINT32_MAX 4294967295UL +# endif +# if !defined (INT32_MAX) +# define INT32_MAX 2147483647L +# endif +# if !defined (INTMAX_MAX) +# define INTMAX_MAX INT64_MAX +# endif +# if !defined (INTMAX_MIN) +# define INTMAX_MIN INT64_MIN +# endif +# endif +#endif + +#ifndef _PSTDINT_H_INCLUDED +#define _PSTDINT_H_INCLUDED + +#ifndef SIZE_MAX +# define SIZE_MAX (~(size_t)0) +#endif + +/* + * Deduce the type assignments from limits.h under the assumption that + * integer sizes in bits are powers of 2, and follow the ANSI + * definitions. + */ + +#ifndef UINT8_MAX +# define UINT8_MAX 0xff +#endif +#if !defined(uint8_t) && !defined(_UINT8_T) +# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) + typedef unsigned char uint8_t; +# define UINT8_C(v) ((uint8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef INT8_MAX +# define INT8_MAX 0x7f +#endif +#ifndef INT8_MIN +# define INT8_MIN INT8_C(0x80) +#endif +#if !defined(int8_t) && !defined(_INT8_T) +# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) + typedef signed char int8_t; +# define INT8_C(v) ((int8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef UINT16_MAX +# define UINT16_MAX 0xffff +#endif +#if !defined(uint16_t) && !defined(_UINT16_T) +#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) + typedef unsigned int uint16_t; +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +# define UINT16_C(v) ((uint16_t) (v)) +#elif (USHRT_MAX == UINT16_MAX) + typedef unsigned short uint16_t; +# define UINT16_C(v) ((uint16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT16_MAX +# define INT16_MAX 0x7fff +#endif +#ifndef INT16_MIN +# define INT16_MIN INT16_C(0x8000) +#endif +#if !defined(int16_t) && !defined(_INT16_T) +#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) + typedef signed int int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +#elif (SHRT_MAX == INT16_MAX) + typedef signed short int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (0xffffffffUL) +#endif +#if !defined(uint32_t) && !defined(_UINT32_T) +#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) + typedef unsigned long uint32_t; +# define UINT32_C(v) v ## UL +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (UINT_MAX == UINT32_MAX) + typedef unsigned int uint32_t; +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +# define UINT32_C(v) v ## U +#elif (USHRT_MAX == UINT32_MAX) + typedef unsigned short uint32_t; +# define UINT32_C(v) ((unsigned short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT32_MAX +# define INT32_MAX (0x7fffffffL) +#endif +#ifndef INT32_MIN +# define INT32_MIN INT32_C(0x80000000) +#endif +#if !defined(int32_t) && !defined(_INT32_T) +#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) + typedef signed long int32_t; +# define INT32_C(v) v ## L +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (INT_MAX == INT32_MAX) + typedef signed int int32_t; +# define INT32_C(v) v +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#elif (SHRT_MAX == INT32_MAX) + typedef signed short int32_t; +# define INT32_C(v) ((short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +/* + * The macro stdint_int64_defined is temporarily used to record + * whether or not 64 integer support is available. It must be + * defined for any 64 integer extensions for new platforms that are + * added. + */ + +#undef stdint_int64_defined +#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) +# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# endif +#endif + +#if !defined (stdint_int64_defined) +# if defined(__GNUC__) +# define stdint_int64_defined + __extension__ typedef long long int64_t; + __extension__ typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) +# define stdint_int64_defined + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; +# define UINT64_C(v) v ## UI64 +# define INT64_C(v) v ## I64 +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "I64" +# endif +# endif +#endif + +#if !defined (LONG_LONG_MAX) && defined (INT64_C) +# define LONG_LONG_MAX INT64_C (9223372036854775807) +#endif +#ifndef ULONG_LONG_MAX +# define ULONG_LONG_MAX UINT64_C (18446744073709551615) +#endif + +#if !defined (INT64_MAX) && defined (INT64_C) +# define INT64_MAX INT64_C (9223372036854775807) +#endif +#if !defined (INT64_MIN) && defined (INT64_C) +# define INT64_MIN INT64_C (-9223372036854775808) +#endif +#if !defined (UINT64_MAX) && defined (INT64_C) +# define UINT64_MAX UINT64_C (18446744073709551615) +#endif + +/* + * Width of hexadecimal for number field. + */ + +#ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +#endif +#ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +#endif +#ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +#endif +#ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +#endif +#ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "19" +#endif +#ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +#endif +#ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +#endif +#ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +#endif +#ifndef PRINTF_UINT64_DEC_WIDTH +# define PRINTF_UINT64_DEC_WIDTH "20" +#endif +#ifndef PRINTF_UINT32_DEC_WIDTH +# define PRINTF_UINT32_DEC_WIDTH "10" +#endif +#ifndef PRINTF_UINT16_DEC_WIDTH +# define PRINTF_UINT16_DEC_WIDTH "5" +#endif +#ifndef PRINTF_UINT8_DEC_WIDTH +# define PRINTF_UINT8_DEC_WIDTH "3" +#endif + +/* + * Ok, lets not worry about 128 bit integers for now. Moore's law says + * we don't need to worry about that until about 2040 at which point + * we'll have bigger things to worry about. + */ + +#ifdef stdint_int64_defined + typedef int64_t intmax_t; + typedef uint64_t uintmax_t; +# define INTMAX_MAX INT64_MAX +# define INTMAX_MIN INT64_MIN +# define UINTMAX_MAX UINT64_MAX +# define UINTMAX_C(v) UINT64_C(v) +# define INTMAX_C(v) INT64_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif +#else + typedef int32_t intmax_t; + typedef uint32_t uintmax_t; +# define INTMAX_MAX INT32_MAX +# define UINTMAX_MAX UINT32_MAX +# define UINTMAX_C(v) UINT32_C(v) +# define INTMAX_C(v) INT32_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH +# endif +#endif + +/* + * Because this file currently only supports platforms which have + * precise powers of 2 as bit sizes for the default integers, the + * least definitions are all trivial. Its possible that a future + * version of this file could have different definitions. + */ + +#ifndef stdint_least_defined + typedef int8_t int_least8_t; + typedef uint8_t uint_least8_t; + typedef int16_t int_least16_t; + typedef uint16_t uint_least16_t; + typedef int32_t int_least32_t; + typedef uint32_t uint_least32_t; +# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER +# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER +# define UINT_LEAST8_MAX UINT8_MAX +# define INT_LEAST8_MAX INT8_MAX +# define UINT_LEAST16_MAX UINT16_MAX +# define INT_LEAST16_MAX INT16_MAX +# define UINT_LEAST32_MAX UINT32_MAX +# define INT_LEAST32_MAX INT32_MAX +# define INT_LEAST8_MIN INT8_MIN +# define INT_LEAST16_MIN INT16_MIN +# define INT_LEAST32_MIN INT32_MIN +# ifdef stdint_int64_defined + typedef int64_t int_least64_t; + typedef uint64_t uint_least64_t; +# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER +# define UINT_LEAST64_MAX UINT64_MAX +# define INT_LEAST64_MAX INT64_MAX +# define INT_LEAST64_MIN INT64_MIN +# endif +#endif +#undef stdint_least_defined + +/* + * The ANSI C committee pretending to know or specify anything about + * performance is the epitome of misguided arrogance. The mandate of + * this file is to *ONLY* ever support that absolute minimum + * definition of the fast integer types, for compatibility purposes. + * No extensions, and no attempt to suggest what may or may not be a + * faster integer type will ever be made in this file. Developers are + * warned to stay away from these types when using this or any other + * stdint.h. + */ + +typedef int_least8_t int_fast8_t; +typedef uint_least8_t uint_fast8_t; +typedef int_least16_t int_fast16_t; +typedef uint_least16_t uint_fast16_t; +typedef int_least32_t int_fast32_t; +typedef uint_least32_t uint_fast32_t; +#define UINT_FAST8_MAX UINT_LEAST8_MAX +#define INT_FAST8_MAX INT_LEAST8_MAX +#define UINT_FAST16_MAX UINT_LEAST16_MAX +#define INT_FAST16_MAX INT_LEAST16_MAX +#define UINT_FAST32_MAX UINT_LEAST32_MAX +#define INT_FAST32_MAX INT_LEAST32_MAX +#define INT_FAST8_MIN INT_LEAST8_MIN +#define INT_FAST16_MIN INT_LEAST16_MIN +#define INT_FAST32_MIN INT_LEAST32_MIN +#ifdef stdint_int64_defined + typedef int_least64_t int_fast64_t; + typedef uint_least64_t uint_fast64_t; +# define UINT_FAST64_MAX UINT_LEAST64_MAX +# define INT_FAST64_MAX INT_LEAST64_MAX +# define INT_FAST64_MIN INT_LEAST64_MIN +#endif + +#undef stdint_int64_defined + +/* + * Whatever piecemeal, per compiler thing we can do about the wchar_t + * type limits. + */ + +#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) +# include <wchar.h> +# ifndef WCHAR_MIN +# define WCHAR_MIN 0 +# endif +# ifndef WCHAR_MAX +# define WCHAR_MAX ((wchar_t)-1) +# endif +#endif + +/* + * Whatever piecemeal, per compiler/platform thing we can do about the + * (u)intptr_t types and limits. + */ + +#if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T) +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +#ifndef STDINT_H_UINTPTR_T_DEFINED +# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__) +# define stdint_intptr_bits 64 +# elif defined (__WATCOMC__) || defined (__TURBOC__) +# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) +# define stdint_intptr_bits 16 +# else +# define stdint_intptr_bits 32 +# endif +# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__) +# define stdint_intptr_bits 32 +# elif defined (__INTEL_COMPILER) +/* TODO -- what did Intel do about x86-64? */ +# else +/* #error "This platform might not be supported yet" */ +# endif + +# ifdef stdint_intptr_bits +# define stdint_intptr_glue3_i(a,b,c) a##b##c +# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) +# ifndef PRINTF_INTPTR_MODIFIER +# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) +# endif +# ifndef PTRDIFF_MAX +# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef PTRDIFF_MIN +# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef UINTPTR_MAX +# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MAX +# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MIN +# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef INTPTR_C +# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) +# endif +# ifndef UINTPTR_C +# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) +# endif + typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; + typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; +# else +/* TODO -- This following is likely wrong for some platforms, and does + nothing for the definition of uintptr_t. */ + typedef ptrdiff_t intptr_t; +# endif +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +/* + * Assumes sig_atomic_t is signed and we have a 2s complement machine. + */ + +#ifndef SIG_ATOMIC_MAX +# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) +#endif + +#endif + +#if defined (__TEST_PSTDINT_FOR_CORRECTNESS) + +/* + * Please compile with the maximum warning settings to make sure macros are + * not defined more than once. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define glue3_aux(x,y,z) x ## y ## z +#define glue3(x,y,z) glue3_aux(x,y,z) + +#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0); +#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0); + +#define DECL(us,bits) glue3(DECL,us,) (bits) + +#define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits) + +#define REPORTERROR(msg) { err_n++; if (err_first <= 0) err_first = __LINE__; printf msg; } + +int main () { + int err_n = 0; + int err_first = 0; + DECL(I,8) + DECL(U,8) + DECL(I,16) + DECL(U,16) + DECL(I,32) + DECL(U,32) +#ifdef INT64_MAX + DECL(I,64) + DECL(U,64) +#endif + intmax_t imax = INTMAX_C(0); + uintmax_t umax = UINTMAX_C(0); + char str0[256], str1[256]; + + sprintf (str0, "%" PRINTF_INT32_MODIFIER "d", INT32_C(2147483647)); + if (0 != strcmp (str0, "2147483647")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str0)); + if (atoi(PRINTF_INT32_DEC_WIDTH) != (int) strlen(str0)) REPORTERROR (("Something wrong with PRINTF_INT32_DEC_WIDTH : %s\n", PRINTF_INT32_DEC_WIDTH)); + sprintf (str0, "%" PRINTF_INT32_MODIFIER "u", UINT32_C(4294967295)); + if (0 != strcmp (str0, "4294967295")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str0)); + if (atoi(PRINTF_UINT32_DEC_WIDTH) != (int) strlen(str0)) REPORTERROR (("Something wrong with PRINTF_UINT32_DEC_WIDTH : %s\n", PRINTF_UINT32_DEC_WIDTH)); +#ifdef INT64_MAX + sprintf (str1, "%" PRINTF_INT64_MODIFIER "d", INT64_C(9223372036854775807)); + if (0 != strcmp (str1, "9223372036854775807")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str1)); + if (atoi(PRINTF_INT64_DEC_WIDTH) != (int) strlen(str1)) REPORTERROR (("Something wrong with PRINTF_INT64_DEC_WIDTH : %s, %d\n", PRINTF_INT64_DEC_WIDTH, (int) strlen(str1))); + sprintf (str1, "%" PRINTF_INT64_MODIFIER "u", UINT64_C(18446744073709550591)); + if (0 != strcmp (str1, "18446744073709550591")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str1)); + if (atoi(PRINTF_UINT64_DEC_WIDTH) != (int) strlen(str1)) REPORTERROR (("Something wrong with PRINTF_UINT64_DEC_WIDTH : %s, %d\n", PRINTF_UINT64_DEC_WIDTH, (int) strlen(str1))); +#endif + + sprintf (str0, "%d %x\n", 0, ~0); + + sprintf (str1, "%d %x\n", i8, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i8 : %s\n", str1)); + sprintf (str1, "%u %x\n", u8, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with u8 : %s\n", str1)); + sprintf (str1, "%d %x\n", i16, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i16 : %s\n", str1)); + sprintf (str1, "%u %x\n", u16, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with u16 : %s\n", str1)); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i32 : %s\n", str1)); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with u32 : %s\n", str1)); +#ifdef INT64_MAX + sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i64 : %s\n", str1)); +#endif + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with imax : %s\n", str1)); + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); + if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with umax : %s\n", str1)); + + TESTUMAX(8); + TESTUMAX(16); + TESTUMAX(32); +#ifdef INT64_MAX + TESTUMAX(64); +#endif + +#define STR(v) #v +#define Q(v) printf ("sizeof " STR(v) " = %u\n", (unsigned) sizeof (v)); + if (err_n) { + printf ("pstdint.h is not correct. Please use sizes below to correct it:\n"); + } + + Q(int) + Q(unsigned) + Q(long int) + Q(short int) + Q(int8_t) + Q(int16_t) + Q(int32_t) +#ifdef INT64_MAX + Q(int64_t) +#endif + + return EXIT_SUCCESS; +} + +#endif diff --git a/external/hash/ptr_set.c b/external/hash/ptr_set.c new file mode 100644 index 0000000..ab12ddf --- /dev/null +++ b/external/hash/ptr_set.c @@ -0,0 +1,60 @@ +/* + * Creates a set of stored pointers by using the pointer itself as key. + * + * (void *)0 (HT_MISSING) cannot be stored. + * (void *)1 (HT_DELETED) also cannot be stored. + * + * ht_item, ht_key, ht_key_len, and ht_match are required. + * + * In this case HT_HASH_FUNCTION is also required because + * we do not read the content of the key but use the pointer + * itself as a key. The default behavior would crash. + * + * Only one hash table can be defined in a single compilation unit + * because of static function names in the generic implementation. + */ + +#include "ptr_set.h" + +static inline size_t ptr_set_hash_function(const void *s, size_t len); +#define HT_HASH_FUNCTION ptr_set_hash_function + +#define HT_LOAD_FACTOR 0.7 +#include "hash_table_def.h" +DEFINE_HASH_TABLE(ptr_set) + +#if defined(PTR_SET_RH) +#include "hash_table_impl_rh.h" +#else +#include "hash_table_impl.h" +#endif + +static inline const void *ht_key(ht_item_t x) +{ + return (const void *)x; +} + +static inline size_t ht_key_len(ht_item_t x) +{ + return sizeof(x); +} + +static inline int ht_match(const void *key, size_t len, ht_item_t x) +{ + (void)len; + return (size_t)key == (size_t)x; +} + +static inline size_t ptr_set_hash_function(const void *s, size_t len) +{ +#if defined (PTR_SET_PTR_HASH) + /* Murmur hash like finalization step. */ + return ht_ptr_hash_function(s, len); +#elif defined (PTR_SET_INT_HASH) + /* Knuths multiplication. */ + return ht_int_hash_function(s, len); +#else + (void)len; + return ht_default_hash_function(&s, sizeof(char *)); +#endif +} diff --git a/external/hash/ptr_set.h b/external/hash/ptr_set.h new file mode 100644 index 0000000..f66e70e --- /dev/null +++ b/external/hash/ptr_set.h @@ -0,0 +1,19 @@ +#ifndef HT_PTR_SET_H +#define HT_PTR_SET_H + +#include "hash_table.h" + +DECLARE_HASH_TABLE(ptr_set, void *) + +/* Return value helpers - these are specific to the implementation. */ +#define PTR_SET_IS_MISSING(x) ((void *)x == (void *)0) +#define PTR_SET_IS_ERROR(x) ((void *)x == (void *)2) +#define PTR_SET_IS_VALID(x) ((void *)x > (void *)2) + +/* Extensions to std. interface. */ +static inline int ptr_set_exists(ptr_set_t *S, void *p) +{ + return ptr_set_find_item(S, p) != (void *)0; +} + +#endif /* HT_PTR_SET_H */ diff --git a/external/hash/str_set.c b/external/hash/str_set.c new file mode 100644 index 0000000..87a3766 --- /dev/null +++ b/external/hash/str_set.c @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> + +#include "str_set.h" +#include "hash_table_def.h" +DEFINE_HASH_TABLE(str_set) +#if defined(STR_SET_RH) +#include "hash_table_impl_rh.h" +#else +#include "hash_table_impl.h" +#endif + +/* + * Simple default implementation of a hash set. The stored items are + * zero-terminated strings. The hash table does not manage the + * allocation of the strings, like it doesn't manage any stored items. + * However, it items are created with, say, strndup, a destructor can be + * provided to free each item when clearing the table. The remove + * operation also returns the removed item so it can be deallocated by + * callee. + * + * In general, the key and the item are different, but here they are the + * same. Normally the key would be referenced by the item. + */ +static inline int ht_match(const void *key, size_t len, str_set_item_t item) +{ + return strncmp(key, item, len) == 0; +} + +static inline const void *ht_key(str_set_item_t item) +{ + return (const void *)item; +} + +static inline size_t ht_key_len(str_set_item_t item) +{ + return strlen(item); +} diff --git a/external/hash/str_set.h b/external/hash/str_set.h new file mode 100644 index 0000000..df5d1c7 --- /dev/null +++ b/external/hash/str_set.h @@ -0,0 +1,32 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef STR_SET_H +#define STR_SET_H + +#include "hash_table.h" + +DECLARE_HASH_TABLE(str_set, char *) + +#endif /* STR_SET_H */ diff --git a/external/hash/token_map.c b/external/hash/token_map.c new file mode 100644 index 0000000..9bf85df --- /dev/null +++ b/external/hash/token_map.c @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> + +/* These are just example settings. */ + +#include "token_map.h" +#define HT_LOAD_FACTOR 0.85 +/* Quadratic probing is ignored with Robin Hood hashing. */ +#define HT_PROBE_QUADRATIC +#include "hash_table_def.h" +DEFINE_HASH_TABLE(token_map) +#if defined(TOKEN_MAP_RH) +#include "hash_table_impl_rh.h" +#else +#include "hash_table_impl.h" +#endif + +static inline const void *ht_key(ht_item_t item) +{ + return item->token; +} + +static inline size_t ht_key_len(ht_item_t item) +{ + return item->len; +} + +static inline int ht_match(const void *key, size_t len, ht_item_t item) +{ + return len == item->len && memcmp(key, item->token, len) == 0; +} diff --git a/external/hash/token_map.h b/external/hash/token_map.h new file mode 100644 index 0000000..700c60e --- /dev/null +++ b/external/hash/token_map.h @@ -0,0 +1,39 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TOKEN_MAP_H +#define TOKEN_MAP_H + +#include "hash_table.h" + +struct token { + char *token; + size_t len; + int type; + void *data; +}; + +DECLARE_HASH_TABLE(token_map, struct token *) + +#endif /* TOKEN_MAP_H */ diff --git a/external/hash/unaligned.h b/external/hash/unaligned.h new file mode 100644 index 0000000..0431f96 --- /dev/null +++ b/external/hash/unaligned.h @@ -0,0 +1,42 @@ +#ifndef UNALIGNED_H +#define UNALIGNED_H + +/* + * This is a simplified version of portable/punaligned.h that does not depend on + * endian detection, but which assumes x86 is always little endian. + * Include the portable version for better precision. + */ + +#ifndef unaligned_read_le16toh + +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) + +#define unaligned_read_le16toh(p) (*(uint16_t*)(p)) +#define unaligned_read_le32toh(p) (*(uint32_t*)(p)) +#define unaligned_read_le64toh(p) (*(uint64_t*)(p)) + +#else + +#define unaligned_read_le16toh(p) ( \ + (((uint16_t)(((uint8_t *)(p))[0])) << 0) | \ + (((uint16_t)(((uint8_t *)(p))[1])) << 8)) + +#define unaligned_read_le32toh(p) ( \ + (((uint32_t)(((uint8_t *)(p))[0])) << 0) | \ + (((uint32_t)(((uint8_t *)(p))[1])) << 8) | \ + (((uint32_t)(((uint8_t *)(p))[2])) << 16) | \ + (((uint32_t)(((uint8_t *)(p))[3])) << 24)) + +#define unaligned_read_le64toh(p) ( \ + (((uint64_t)(((uint8_t *)(p))[0])) << 0) | \ + (((uint64_t)(((uint8_t *)(p))[1])) << 8) | \ + (((uint64_t)(((uint8_t *)(p))[2])) << 16) | \ + (((uint64_t)(((uint8_t *)(p))[3])) << 24) | \ + (((uint64_t)(((uint8_t *)(p))[4])) << 32) | \ + (((uint64_t)(((uint8_t *)(p))[5])) << 40) | \ + (((uint64_t)(((uint8_t *)(p))[6])) << 48) | \ + (((uint64_t)(((uint8_t *)(p))[7])) << 56)) +#endif +#endif + +#endif /* UNALIGNED_H */ |