aboutsummaryrefslogtreecommitdiff
path: root/external
diff options
context:
space:
mode:
Diffstat (limited to 'external')
-rw-r--r--external/grisu3/.gitignore1
-rw-r--r--external/grisu3/LICENSE14
-rw-r--r--external/grisu3/README.md9
-rw-r--r--external/grisu3/grisu3_math.h329
-rw-r--r--external/grisu3/grisu3_parse.h582
-rw-r--r--external/grisu3/grisu3_print.h265
-rw-r--r--external/grisu3/grisu3_test.c141
-rw-r--r--external/grisu3/grisu3_test_dblcnv.c482
-rwxr-xr-xexternal/grisu3/test.sh18
-rwxr-xr-xexternal/grisu3/test_dblcnv.sh15
-rw-r--r--external/hash/.gitignore1
-rw-r--r--external/hash/CMakeLists.txt38
-rw-r--r--external/hash/LICENSE28
-rw-r--r--external/hash/PMurHash.c334
-rw-r--r--external/hash/PMurHash.h64
-rw-r--r--external/hash/README.md158
-rw-r--r--external/hash/cmetrohash.h78
-rw-r--r--external/hash/cmetrohash64.c185
-rw-r--r--external/hash/hash.h115
-rw-r--r--external/hash/hash_table.h266
-rw-r--r--external/hash/hash_table_def.h154
-rw-r--r--external/hash/hash_table_impl.h233
-rw-r--r--external/hash/hash_table_impl_rh.h360
-rw-r--r--external/hash/hash_test.c419
-rw-r--r--external/hash/ht32.c47
-rw-r--r--external/hash/ht32.h36
-rw-r--r--external/hash/ht32rh.c47
-rw-r--r--external/hash/ht32rh.h36
-rw-r--r--external/hash/ht64.c47
-rw-r--r--external/hash/ht64.h36
-rw-r--r--external/hash/ht64rh.c47
-rw-r--r--external/hash/ht64rh.h36
-rw-r--r--external/hash/ht_hash_function.h258
-rw-r--r--external/hash/ht_portable.h9
-rw-r--r--external/hash/ht_trace.h59
-rwxr-xr-xexternal/hash/initbuild.sh5
-rwxr-xr-xexternal/hash/initbuild_debug.sh5
-rw-r--r--external/hash/int_set.h50
-rw-r--r--external/hash/load_test.c86
-rw-r--r--external/hash/pstdint.h898
-rw-r--r--external/hash/ptr_set.c60
-rw-r--r--external/hash/ptr_set.h19
-rw-r--r--external/hash/str_set.c61
-rw-r--r--external/hash/str_set.h32
-rw-r--r--external/hash/token_map.c54
-rw-r--r--external/hash/token_map.h39
-rw-r--r--external/hash/unaligned.h42
-rw-r--r--external/lex/LICENSE21
-rw-r--r--external/lex/README.md3
-rw-r--r--external/lex/luthor.c1509
-rw-r--r--external/lex/luthor.h472
-rw-r--r--external/lex/tokens.h554
52 files changed, 8857 insertions, 0 deletions
diff --git a/external/grisu3/.gitignore b/external/grisu3/.gitignore
new file mode 100644
index 0000000..567609b
--- /dev/null
+++ b/external/grisu3/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/external/grisu3/LICENSE b/external/grisu3/LICENSE
new file mode 100644
index 0000000..bb7ca57
--- /dev/null
+++ b/external/grisu3/LICENSE
@@ -0,0 +1,14 @@
+Copyright (c) 2016 Mikkel F. Jørgensen, dvide.com
+Some files also Copyright author of MathGeoLib (https://github.com/juj)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. http://www.apache.org/licenses/LICENSE-2.0
diff --git a/external/grisu3/README.md b/external/grisu3/README.md
new file mode 100644
index 0000000..5f5c62e
--- /dev/null
+++ b/external/grisu3/README.md
@@ -0,0 +1,9 @@
+Implements the grisu3 floating point printing and parsing algorithm
+based on earlier work:
+
+- <http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf>
+- <https://github.com/google/double-conversion>
+- <https://github.com/juj/MathGeoLib/blob/master/src/Math/grisu3.c>
+- <http://www.exploringbinary.com/quick-and-dirty-floating-point-to-decimal-conversion/>
+
+
diff --git a/external/grisu3/grisu3_math.h b/external/grisu3/grisu3_math.h
new file mode 100644
index 0000000..cff6e8c
--- /dev/null
+++ b/external/grisu3/grisu3_math.h
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2016 Mikkel F. Jørgensen, dvide.com
+ * Copyright author of MathGeoLib (https://github.com/juj)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+/* 2016-02-02: Updated by mikkelfj
+ *
+ * Extracted from MatGeoLib grisu3.c, Apache 2.0 license, and extended.
+ *
+ * This file is usually include via grisu3_print.h or grisu3_parse.h.
+ *
+ * The original MatGeoLib dtoa_grisu3 implementation is largely
+ * unchanged except for the uint64 to double cast. The remaining changes
+ * are file structure, name changes, and new additions for parsing:
+ *
+ * - Split into header files only:
+ * grisu3_math.h, grisu3_print.h, (added grisu3_parse.h)
+ *
+ * - names prefixed with grisu3_, grisu3_diy_fp_, GRISU3_.
+ * - added static to all functions.
+ * - disabled clang unused function warnings.
+ * - guarded <stdint.h> to allow for alternative impl.
+ * - added extra numeric constants needed for parsing.
+ * - added dec_pow, cast_double_from_diy_fp.
+ * - changed some function names for consistency.
+ * - moved printing specific grisu3 functions to grisu3_print.h.
+ * - changed double to uint64 cast to avoid aliasing.
+ * - added new grisu3_parse.h for parsing doubles.
+ * - grisu3_print_double (dtoa_grisu3) format .1 as 0.1 needed for valid JSON output
+ * and grisu3_parse_double wouldn't consume it.
+ * - grsu3_print_double changed formatting to prefer 0.012 over 1.2e-2.
+ *
+ * These changes make it possible to include the files as headers only
+ * in other software libraries without risking name conflicts, and to
+ * extend the implementation with a port of Googles Double Conversion
+ * strtod functionality for parsing doubles.
+ *
+ * Extracted from: rev. 915501a / Dec 22, 2015
+ * <https://github.com/juj/MathGeoLib/blob/master/src/Math/grisu3.c>
+ * MathGeoLib License: http://www.apache.org/licenses/LICENSE-2.0.html
+ */
+
+#ifndef GRISU3_MATH_H
+#define GRISU3_MATH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Guarded to allow inclusion of pstdint.h first, if stdint.h is not supported. */
+#ifndef UINT8_MAX
+#include <stdint.h> /* uint64_t etc. */
+#endif
+
+#ifdef GRISU3_NO_ASSERT
+#undef GRISU3_ASSERT
+#define GRISU3_ASSERT(x) ((void)0)
+#endif
+
+#ifndef GRISU3_ASSERT
+#include <assert.h> /* assert */
+#define GRISU3_ASSERT(x) assert(x)
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer */
+#endif
+
+#define GRISU3_D64_SIGN 0x8000000000000000ULL
+#define GRISU3_D64_EXP_MASK 0x7FF0000000000000ULL
+#define GRISU3_D64_FRACT_MASK 0x000FFFFFFFFFFFFFULL
+#define GRISU3_D64_IMPLICIT_ONE 0x0010000000000000ULL
+#define GRISU3_D64_EXP_POS 52
+#define GRISU3_D64_EXP_BIAS 1075
+#define GRISU3_D64_DENORM_EXP (-GRISU3_D64_EXP_BIAS + 1)
+#define GRISU3_DIY_FP_FRACT_SIZE 64
+#define GRISU3_D_1_LOG2_10 0.30102999566398114 /* 1 / lg(10) */
+#define GRISU3_MIN_TARGET_EXP -60
+#define GRISU3_MASK32 0xFFFFFFFFULL
+#define GRISU3_MIN_CACHED_EXP -348
+#define GRISU3_MAX_CACHED_EXP 340
+#define GRISU3_CACHED_EXP_STEP 8
+#define GRISU3_D64_MAX_DEC_EXP 309
+#define GRISU3_D64_MIN_DEC_EXP -324
+#define GRISU3_D64_INF GRISU3_D64_EXP_MASK
+
+#define GRISU3_MIN(x,y) ((x) <= (y) ? (x) : (y))
+#define GRISU3_MAX(x,y) ((x) >= (y) ? (x) : (y))
+
+
+typedef struct grisu3_diy_fp
+{
+ uint64_t f;
+ int e;
+} grisu3_diy_fp_t;
+
+typedef struct grisu3_diy_fp_power
+{
+ uint64_t fract;
+ int16_t b_exp, d_exp;
+} grisu3_diy_fp_power_t;
+
+typedef union {
+ uint64_t u64;
+ double d64;
+} grisu3_cast_double_t;
+
+static uint64_t grisu3_cast_uint64_from_double(double d)
+{
+ grisu3_cast_double_t cd;
+ cd.d64 = d;
+ return cd.u64;
+}
+
+static double grisu3_cast_double_from_uint64(uint64_t u)
+{
+ grisu3_cast_double_t cd;
+ cd.u64 = u;
+ return cd.d64;
+}
+
+#define grisu3_double_infinity grisu3_cast_double_from_uint64(GRISU3_D64_INF)
+#define grisu3_double_nan grisu3_cast_double_from_uint64(GRISU3_D64_INF + 1)
+
+static const grisu3_diy_fp_power_t grisu3_diy_fp_pow_cache[] =
+{
+ { 0xfa8fd5a0081c0288ULL, -1220, -348 },
+ { 0xbaaee17fa23ebf76ULL, -1193, -340 },
+ { 0x8b16fb203055ac76ULL, -1166, -332 },
+ { 0xcf42894a5dce35eaULL, -1140, -324 },
+ { 0x9a6bb0aa55653b2dULL, -1113, -316 },
+ { 0xe61acf033d1a45dfULL, -1087, -308 },
+ { 0xab70fe17c79ac6caULL, -1060, -300 },
+ { 0xff77b1fcbebcdc4fULL, -1034, -292 },
+ { 0xbe5691ef416bd60cULL, -1007, -284 },
+ { 0x8dd01fad907ffc3cULL, -980, -276 },
+ { 0xd3515c2831559a83ULL, -954, -268 },
+ { 0x9d71ac8fada6c9b5ULL, -927, -260 },
+ { 0xea9c227723ee8bcbULL, -901, -252 },
+ { 0xaecc49914078536dULL, -874, -244 },
+ { 0x823c12795db6ce57ULL, -847, -236 },
+ { 0xc21094364dfb5637ULL, -821, -228 },
+ { 0x9096ea6f3848984fULL, -794, -220 },
+ { 0xd77485cb25823ac7ULL, -768, -212 },
+ { 0xa086cfcd97bf97f4ULL, -741, -204 },
+ { 0xef340a98172aace5ULL, -715, -196 },
+ { 0xb23867fb2a35b28eULL, -688, -188 },
+ { 0x84c8d4dfd2c63f3bULL, -661, -180 },
+ { 0xc5dd44271ad3cdbaULL, -635, -172 },
+ { 0x936b9fcebb25c996ULL, -608, -164 },
+ { 0xdbac6c247d62a584ULL, -582, -156 },
+ { 0xa3ab66580d5fdaf6ULL, -555, -148 },
+ { 0xf3e2f893dec3f126ULL, -529, -140 },
+ { 0xb5b5ada8aaff80b8ULL, -502, -132 },
+ { 0x87625f056c7c4a8bULL, -475, -124 },
+ { 0xc9bcff6034c13053ULL, -449, -116 },
+ { 0x964e858c91ba2655ULL, -422, -108 },
+ { 0xdff9772470297ebdULL, -396, -100 },
+ { 0xa6dfbd9fb8e5b88fULL, -369, -92 },
+ { 0xf8a95fcf88747d94ULL, -343, -84 },
+ { 0xb94470938fa89bcfULL, -316, -76 },
+ { 0x8a08f0f8bf0f156bULL, -289, -68 },
+ { 0xcdb02555653131b6ULL, -263, -60 },
+ { 0x993fe2c6d07b7facULL, -236, -52 },
+ { 0xe45c10c42a2b3b06ULL, -210, -44 },
+ { 0xaa242499697392d3ULL, -183, -36 },
+ { 0xfd87b5f28300ca0eULL, -157, -28 },
+ { 0xbce5086492111aebULL, -130, -20 },
+ { 0x8cbccc096f5088ccULL, -103, -12 },
+ { 0xd1b71758e219652cULL, -77, -4 },
+ { 0x9c40000000000000ULL, -50, 4 },
+ { 0xe8d4a51000000000ULL, -24, 12 },
+ { 0xad78ebc5ac620000ULL, 3, 20 },
+ { 0x813f3978f8940984ULL, 30, 28 },
+ { 0xc097ce7bc90715b3ULL, 56, 36 },
+ { 0x8f7e32ce7bea5c70ULL, 83, 44 },
+ { 0xd5d238a4abe98068ULL, 109, 52 },
+ { 0x9f4f2726179a2245ULL, 136, 60 },
+ { 0xed63a231d4c4fb27ULL, 162, 68 },
+ { 0xb0de65388cc8ada8ULL, 189, 76 },
+ { 0x83c7088e1aab65dbULL, 216, 84 },
+ { 0xc45d1df942711d9aULL, 242, 92 },
+ { 0x924d692ca61be758ULL, 269, 100 },
+ { 0xda01ee641a708deaULL, 295, 108 },
+ { 0xa26da3999aef774aULL, 322, 116 },
+ { 0xf209787bb47d6b85ULL, 348, 124 },
+ { 0xb454e4a179dd1877ULL, 375, 132 },
+ { 0x865b86925b9bc5c2ULL, 402, 140 },
+ { 0xc83553c5c8965d3dULL, 428, 148 },
+ { 0x952ab45cfa97a0b3ULL, 455, 156 },
+ { 0xde469fbd99a05fe3ULL, 481, 164 },
+ { 0xa59bc234db398c25ULL, 508, 172 },
+ { 0xf6c69a72a3989f5cULL, 534, 180 },
+ { 0xb7dcbf5354e9beceULL, 561, 188 },
+ { 0x88fcf317f22241e2ULL, 588, 196 },
+ { 0xcc20ce9bd35c78a5ULL, 614, 204 },
+ { 0x98165af37b2153dfULL, 641, 212 },
+ { 0xe2a0b5dc971f303aULL, 667, 220 },
+ { 0xa8d9d1535ce3b396ULL, 694, 228 },
+ { 0xfb9b7cd9a4a7443cULL, 720, 236 },
+ { 0xbb764c4ca7a44410ULL, 747, 244 },
+ { 0x8bab8eefb6409c1aULL, 774, 252 },
+ { 0xd01fef10a657842cULL, 800, 260 },
+ { 0x9b10a4e5e9913129ULL, 827, 268 },
+ { 0xe7109bfba19c0c9dULL, 853, 276 },
+ { 0xac2820d9623bf429ULL, 880, 284 },
+ { 0x80444b5e7aa7cf85ULL, 907, 292 },
+ { 0xbf21e44003acdd2dULL, 933, 300 },
+ { 0x8e679c2f5e44ff8fULL, 960, 308 },
+ { 0xd433179d9c8cb841ULL, 986, 316 },
+ { 0x9e19db92b4e31ba9ULL, 1013, 324 },
+ { 0xeb96bf6ebadf77d9ULL, 1039, 332 },
+ { 0xaf87023b9bf0ee6bULL, 1066, 340 }
+};
+
+/* Avoid dependence on lib math to get (int)ceil(v) */
+static int grisu3_iceil(double v)
+{
+ int k = (int)v;
+ if (v < 0) return k;
+ return v - k == 0 ? k : k + 1;
+}
+
+static int grisu3_diy_fp_cached_pow(int exp, grisu3_diy_fp_t *p)
+{
+ int k = grisu3_iceil((exp+GRISU3_DIY_FP_FRACT_SIZE-1) * GRISU3_D_1_LOG2_10);
+ int i = (k-GRISU3_MIN_CACHED_EXP-1) / GRISU3_CACHED_EXP_STEP + 1;
+ p->f = grisu3_diy_fp_pow_cache[i].fract;
+ p->e = grisu3_diy_fp_pow_cache[i].b_exp;
+ return grisu3_diy_fp_pow_cache[i].d_exp;
+}
+
+static grisu3_diy_fp_t grisu3_diy_fp_minus(grisu3_diy_fp_t x, grisu3_diy_fp_t y)
+{
+ grisu3_diy_fp_t d; d.f = x.f - y.f; d.e = x.e;
+ GRISU3_ASSERT(x.e == y.e && x.f >= y.f);
+ return d;
+}
+
+static grisu3_diy_fp_t grisu3_diy_fp_multiply(grisu3_diy_fp_t x, grisu3_diy_fp_t y)
+{
+ uint64_t a, b, c, d, ac, bc, ad, bd, tmp;
+ grisu3_diy_fp_t r;
+ a = x.f >> 32; b = x.f & GRISU3_MASK32;
+ c = y.f >> 32; d = y.f & GRISU3_MASK32;
+ ac = a*c; bc = b*c;
+ ad = a*d; bd = b*d;
+ tmp = (bd >> 32) + (ad & GRISU3_MASK32) + (bc & GRISU3_MASK32);
+ tmp += 1U << 31; /* round */
+ r.f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
+ r.e = x.e + y.e + 64;
+ return r;
+}
+
+static grisu3_diy_fp_t grisu3_diy_fp_normalize(grisu3_diy_fp_t n)
+{
+ GRISU3_ASSERT(n.f != 0);
+ while(!(n.f & 0xFFC0000000000000ULL)) { n.f <<= 10; n.e -= 10; }
+ while(!(n.f & GRISU3_D64_SIGN)) { n.f <<= 1; --n.e; }
+ return n;
+}
+
+static grisu3_diy_fp_t grisu3_cast_diy_fp_from_double(double d)
+{
+ grisu3_diy_fp_t fp;
+ uint64_t u64 = grisu3_cast_uint64_from_double(d);
+ if (!(u64 & GRISU3_D64_EXP_MASK)) { fp.f = u64 & GRISU3_D64_FRACT_MASK; fp.e = 1 - GRISU3_D64_EXP_BIAS; }
+ else { fp.f = (u64 & GRISU3_D64_FRACT_MASK) + GRISU3_D64_IMPLICIT_ONE; fp.e = (int)((u64 & GRISU3_D64_EXP_MASK) >> GRISU3_D64_EXP_POS) - GRISU3_D64_EXP_BIAS; }
+ return fp;
+}
+
+static double grisu3_cast_double_from_diy_fp(grisu3_diy_fp_t n)
+{
+ const uint64_t hidden_bit = GRISU3_D64_IMPLICIT_ONE;
+ const uint64_t frac_mask = GRISU3_D64_FRACT_MASK;
+ const int denorm_exp = GRISU3_D64_DENORM_EXP;
+ const int exp_bias = GRISU3_D64_EXP_BIAS;
+ const int exp_pos = GRISU3_D64_EXP_POS;
+
+ grisu3_diy_fp_t v = n;
+ uint64_t e_biased;
+
+ while (v.f > hidden_bit + frac_mask) {
+ v.f >>= 1;
+ ++v.e;
+ }
+ if (v.e < denorm_exp) {
+ return 0.0;
+ }
+ while (v.e > denorm_exp && (v.f & hidden_bit) == 0) {
+ v.f <<= 1;
+ --v.e;
+ }
+ if (v.e == denorm_exp && (v.f & hidden_bit) == 0) {
+ e_biased = 0;
+ } else {
+ e_biased = (uint64_t)(v.e + exp_bias);
+ }
+ return grisu3_cast_double_from_uint64((v.f & frac_mask) | (e_biased << exp_pos));
+}
+
+/* pow10_cache[i] = 10^(i-1) */
+static const unsigned int grisu3_pow10_cache[] = { 0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+
+static int grisu3_largest_pow10(uint32_t n, int n_bits, uint32_t *power)
+{
+ int guess = ((n_bits + 1) * 1233 >> 12) + 1/*skip first entry*/;
+ if (n < grisu3_pow10_cache[guess]) --guess; /* We don't have any guarantees that 2^n_bits <= n. */
+ *power = grisu3_pow10_cache[guess];
+ return guess;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GRISU3_MATH_H */
diff --git a/external/grisu3/grisu3_parse.h b/external/grisu3/grisu3_parse.h
new file mode 100644
index 0000000..3d67c9a
--- /dev/null
+++ b/external/grisu3/grisu3_parse.h
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2016 Mikkel F. Jørgensen, dvide.com
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+/*
+ * Port of parts of Google Double Conversion strtod functionality
+ * but with fallback to strtod instead of a bignum implementation.
+ *
+ * Based on grisu3 math from MathGeoLib.
+ *
+ * See also grisu3_math.h comments.
+ */
+
+#ifndef GRISU3_PARSE_H
+#define GRISU3_PARSE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef UINT8_MAX
+#include <stdint.h>
+#endif
+
+#include <stdlib.h>
+#include <limits.h>
+
+#include "grisu3_math.h"
+
+
+/*
+ * The maximum number characters a valid number may contain. The parse
+ * fails if the input length is longer but the character after max len
+ * was part of the number.
+ *
+ * The length should not be set too high because it protects against
+ * overflow in the exponent part derived from the input length.
+ */
+#define GRISU3_NUM_MAX_LEN 1000
+
+/*
+ * The lightweight "portable" C library recognizes grisu3 support if
+ * included first.
+ */
+#define grisu3_parse_double_is_defined 1
+
+/*
+ * Disable to compare performance and to test diy_fp algorithm in
+ * broader range.
+ */
+#define GRISU3_PARSE_FAST_CASE
+
+/* May result in a one off error, otherwise when uncertain, fall back to strtod. */
+//#define GRISU3_PARSE_ALLOW_ERROR
+
+
+/*
+ * The dec output exponent jumps in 8, so the result is offset at most
+ * by 7 when the input is within range.
+ */
+static int grisu3_diy_fp_cached_dec_pow(int d_exp, grisu3_diy_fp_t *p)
+{
+ const int cached_offset = -GRISU3_MIN_CACHED_EXP;
+ const int d_exp_dist = GRISU3_CACHED_EXP_STEP;
+ int i, a_exp;
+
+ GRISU3_ASSERT(GRISU3_MIN_CACHED_EXP <= d_exp);
+ GRISU3_ASSERT(d_exp < GRISU3_MAX_CACHED_EXP + d_exp_dist);
+
+ i = (d_exp + cached_offset) / d_exp_dist;
+ a_exp = grisu3_diy_fp_pow_cache[i].d_exp;
+ p->f = grisu3_diy_fp_pow_cache[i].fract;
+ p->e = grisu3_diy_fp_pow_cache[i].b_exp;
+
+ GRISU3_ASSERT(a_exp <= d_exp);
+ GRISU3_ASSERT(d_exp < a_exp + d_exp_dist);
+
+ return a_exp;
+}
+
+/*
+ * Ported from google double conversion strtod using
+ * MathGeoLibs diy_fp functions for grisu3 in C.
+ *
+ * ulp_half_error is set if needed to trunacted non-zero trialing
+ * characters.
+ *
+ * The actual value we need to encode is:
+ *
+ * (sign ? -1 : 1) * fraction * 2 ^ (exponent - fraction_exp)
+ * where exponent is the base 10 exponent assuming the decimal point is
+ * after the first digit. fraction_exp is the base 10 magnitude of the
+ * fraction or number of significant digits - 1.
+ *
+ * If the exponent is between 0 and 22 and the fraction is encoded in
+ * the lower 53 bits (the largest bit is implicit in a double, but not
+ * in this fraction), then the value can be trivially converted to
+ * double without loss of precision. If the fraction was in fact
+ * multiplied by trailing zeroes that we didn't convert to exponent,
+ * we there are larger values the 53 bits that can also be encoded
+ * trivially - but then it is better to handle this during parsing
+ * if it is worthwhile. We do not optimize for this here, because it
+ * can be done in a simple check before calling, and because it might
+ * not be worthwile to do at all since it cery likely will fail for
+ * numbers printed to be convertible back to double without loss.
+ *
+ * Returns 0 if conversion was not exact. In that case the vale is
+ * either one smaller than the correct one, or the correct one.
+ *
+ * Exponents must be range protected before calling otherwise cached
+ * powers will blow up.
+ *
+ * Google Double Conversion seems to prefer the following notion:
+ *
+ * x >= 10^309 => +Inf
+ * x <= 10^-324 => 0,
+ *
+ * max double: HUGE_VAL = 1.7976931348623157 * 10^308
+ * min double: 4.9406564584124654 * 10^-324
+ *
+ * Values just below or above min/max representable number
+ * may round towards large/small non-Inf/non-neg values.
+ *
+ * but `strtod` seems to return +/-HUGE_VAL on overflow?
+ */
+static int grisu3_diy_fp_encode_double(uint64_t fraction, int exponent, int fraction_exp, int ulp_half_error, double *result)
+{
+ /*
+ * Error is measures in fractions of integers, so we scale up to get
+ * some resolution to represent error expressions.
+ */
+ const int log2_error_one = 3;
+ const int error_one = 1 << log2_error_one;
+ const int denorm_exp = GRISU3_D64_DENORM_EXP;
+ const uint64_t hidden_bit = GRISU3_D64_IMPLICIT_ONE;
+ const int diy_size = GRISU3_DIY_FP_FRACT_SIZE;
+ const int max_digits = 19;
+
+ int error = ulp_half_error ? error_one / 2 : 0;
+ int d_exp = (exponent - fraction_exp);
+ int a_exp;
+ int o_exp;
+ grisu3_diy_fp_t v = { fraction, 0 };
+ grisu3_diy_fp_t cp;
+ grisu3_diy_fp_t rounded;
+ int mag;
+ int prec;
+ int prec_bits;
+ int half_way;
+
+ /* When fractions in a double aren't stored with implicit msb fraction bit. */
+
+ /* Shift fraction to msb. */
+ v = grisu3_diy_fp_normalize(v);
+ /* The half point error moves up while the exponent moves down. */
+ error <<= -v.e;
+
+ a_exp = grisu3_diy_fp_cached_dec_pow(d_exp, &cp);
+
+ /* Interpolate between cached powers at distance 8. */
+ if (a_exp != d_exp) {
+ int adj_exp = d_exp - a_exp - 1;
+ static grisu3_diy_fp_t cp_10_lut[] = {
+ { 0xa000000000000000ULL, -60 },
+ { 0xc800000000000000ULL, -57 },
+ { 0xfa00000000000000ULL, -54 },
+ { 0x9c40000000000000ULL, -50 },
+ { 0xc350000000000000ULL, -47 },
+ { 0xf424000000000000ULL, -44 },
+ { 0x9896800000000000ULL, -40 },
+ };
+ GRISU3_ASSERT(adj_exp >= 0 && adj_exp < 7);
+ v = grisu3_diy_fp_multiply(v, cp_10_lut[adj_exp]);
+
+ /* 20 decimal digits won't always fit in 64 bit.
+ * (`fraction_exp` is one less than significant decimal
+ * digits in fraction, e.g. 1 * 10e0).
+ * If we cannot fit, introduce 1/2 ulp error
+ * (says double conversion reference impl.) */
+ if (1 + fraction_exp + adj_exp > max_digits) {
+ error += error_one / 2;
+ }
+ }
+
+ v = grisu3_diy_fp_multiply(v, cp);
+ /*
+ * Google double conversion claims that:
+ *
+ * The error introduced by a multiplication of a*b equals
+ * error_a + error_b + error_a*error_b/2^64 + 0.5
+ * Substituting a with 'input' and b with 'cached_power' we have
+ * error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
+ * error_ab = 0 or 1 / error_oner > error_a*error_b/ 2^64
+ *
+ * which in our encoding becomes:
+ * error_a = error_one/2
+ * error_ab = 1 / error_one (rounds up to 1 if error != 0, or 0 * otherwise)
+ * fixed_error = error_one/2
+ *
+ * error += error_a + fixed_error + (error ? 1 : 0)
+ *
+ * (this isn't entirely clear, but that is as close as we get).
+ */
+ error += error_one + (error ? 1 : 0);
+
+ o_exp = v.e;
+ v = grisu3_diy_fp_normalize(v);
+ /* Again, if we shift the significant bits, the error moves along. */
+ error <<= o_exp - v.e;
+
+ /*
+ * The value `v` is bounded by 2^mag which is 64 + v.e. because we
+ * just normalized it by shifting towards msb.
+ */
+ mag = diy_size + v.e;
+
+ /* The effective magnitude of the IEEE double representation. */
+ mag = mag >= diy_size + denorm_exp ? diy_size : mag <= denorm_exp ? 0 : mag - denorm_exp;
+ prec = diy_size - mag;
+ if (prec + log2_error_one >= diy_size) {
+ int e_scale = prec + log2_error_one - diy_size - 1;
+ v.f >>= e_scale;
+ v.e += e_scale;
+ error = (error >> e_scale) + 1 + error_one;
+ prec -= e_scale;
+ }
+ rounded.f = v.f >> prec;
+ rounded.e = v.e + prec;
+ prec_bits = (int)(v.f & ((uint64_t)1 << (prec - 1))) * error_one;
+ half_way = (int)((uint64_t)1 << (prec - 1)) * error_one;
+ if (prec >= half_way + error) {
+ rounded.f++;
+ /* Prevent overflow. */
+ if (rounded.f & (hidden_bit << 1)) {
+ rounded.f >>= 1;
+ rounded.e += 1;
+ }
+ }
+ *result = grisu3_cast_double_from_diy_fp(rounded);
+ return half_way - error >= prec_bits || prec_bits >= half_way + error;
+}
+
+/*
+ * `end` is unchanged if number is handled natively, or it is the result
+ * of strtod parsing in case of fallback.
+ */
+static const char *grisu3_encode_double(const char *buf, const char *end, int sign, uint64_t fraction, int exponent, int fraction_exp, int ulp_half_error, double *result)
+{
+ const int max_d_exp = GRISU3_D64_MAX_DEC_EXP;
+ const int min_d_exp = GRISU3_D64_MIN_DEC_EXP;
+
+ char *v_end;
+
+ /* Both for user experience, and to protect internal power table lookups. */
+ if (fraction == 0 || exponent < min_d_exp) {
+ *result = 0.0;
+ goto done;
+ }
+ if (exponent - 1 > max_d_exp) {
+ *result = grisu3_double_infinity;
+ goto done;
+ }
+
+ /*
+ * `exponent` is the normalized value, fraction_exp is the size of
+ * the representation in the `fraction value`, or one less than
+ * number of significant digits.
+ *
+ * If the final value can be kept in 53 bits and we can avoid
+ * division, then we can convert to double quite fast.
+ *
+ * ulf_half_error only happens when fraction is maxed out, so
+ * fraction_exp > 22 by definition.
+ *
+ * fraction_exp >= 0 always.
+ *
+ * http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+ */
+
+
+#ifdef GRISU3_PARSE_FAST_CASE
+ if (fraction < (1ULL << 53) && exponent >= 0 && exponent <= 22) {
+ double v = (double)fraction;
+ /* Multiplying by 1e-k instead of dividing by 1ek results in rounding error. */
+ switch (exponent - fraction_exp) {
+ case -22: v /= 1e22; break;
+ case -21: v /= 1e21; break;
+ case -20: v /= 1e20; break;
+ case -19: v /= 1e19; break;
+ case -18: v /= 1e18; break;
+ case -17: v /= 1e17; break;
+ case -16: v /= 1e16; break;
+ case -15: v /= 1e15; break;
+ case -14: v /= 1e14; break;
+ case -13: v /= 1e13; break;
+ case -12: v /= 1e12; break;
+ case -11: v /= 1e11; break;
+ case -10: v /= 1e10; break;
+ case -9: v /= 1e9; break;
+ case -8: v /= 1e8; break;
+ case -7: v /= 1e7; break;
+ case -6: v /= 1e6; break;
+ case -5: v /= 1e5; break;
+ case -4: v /= 1e4; break;
+ case -3: v /= 1e3; break;
+ case -2: v /= 1e2; break;
+ case -1: v /= 1e1; break;
+ case 0: break;
+ case 1: v *= 1e1; break;
+ case 2: v *= 1e2; break;
+ case 3: v *= 1e3; break;
+ case 4: v *= 1e4; break;
+ case 5: v *= 1e5; break;
+ case 6: v *= 1e6; break;
+ case 7: v *= 1e7; break;
+ case 8: v *= 1e8; break;
+ case 9: v *= 1e9; break;
+ case 10: v *= 1e10; break;
+ case 11: v *= 1e11; break;
+ case 12: v *= 1e12; break;
+ case 13: v *= 1e13; break;
+ case 14: v *= 1e14; break;
+ case 15: v *= 1e15; break;
+ case 16: v *= 1e16; break;
+ case 17: v *= 1e17; break;
+ case 18: v *= 1e18; break;
+ case 19: v *= 1e19; break;
+ case 20: v *= 1e20; break;
+ case 21: v *= 1e21; break;
+ case 22: v *= 1e22; break;
+ }
+ *result = v;
+ goto done;
+ }
+#endif
+
+ if (grisu3_diy_fp_encode_double(fraction, exponent, fraction_exp, ulp_half_error, result)) {
+ goto done;
+ }
+#ifdef GRISU3_PARSE_ALLOW_ERROR
+ goto done;
+#endif
+ *result = strtod(buf, &v_end);
+ if (v_end < end) {
+ return v_end;
+ }
+ return end;
+done:
+ if (sign) {
+ *result = -*result;
+ }
+ return end;
+}
+
+/*
+ * Returns buf if number wasn't matched, or null if number starts ok
+ * but contains invalid content.
+ */
+static const char *grisu3_parse_hex_fp(const char *buf, const char *end, int sign, double *result)
+{
+ (void)buf;
+ (void)end;
+ (void)sign;
+ *result = 0.0;
+ /* Not currently supported. */
+ return buf;
+}
+
+/*
+ * Returns end pointer on success, or null, or buf if start is not a number.
+ * Sets result to 0.0 on error.
+ * Reads up to len + 1 bytes from buffer where len + 1 must not be a
+ * valid part of a number, but all of buf, buf + len need not be a
+ * number. Leading whitespace is NOT valid.
+ * Very small numbers are truncated to +/-0.0 and numerically very large
+ * numbers are returns as +/-infinity.
+ *
+ * A value must not end or begin with '.' (like JSON), but can have
+ * leading zeroes (unlike JSON). A single leading zero followed by
+ * an encoding symbol may or may not be interpreted as a non-decimal
+ * encoding prefix, e.g. 0x, but a leading zero followed by a digit is
+ * NOT interpreted as octal.
+ * A single leading negative sign may appear before digits, but positive
+ * sign is not allowed and space after the sign is not allowed.
+ * At most the first 1000 characters of the input is considered.
+ */
+static const char *grisu3_parse_double(const char *buf, size_t len, double *result)
+{
+ const char *mark, *k, *end;
+ int sign = 0, esign = 0;
+ uint64_t fraction = 0;
+ int exponent = 0;
+ int ee = 0;
+ int fraction_exp = 0;
+ int ulp_half_error = 0;
+
+ *result = 0.0;
+
+ end = buf + len + 1;
+
+ /* Failsafe for exponent overflow. */
+ if (len > GRISU3_NUM_MAX_LEN) {
+ end = buf + GRISU3_NUM_MAX_LEN + 1;
+ }
+
+ if (buf == end) {
+ return buf;
+ }
+ mark = buf;
+ if (*buf == '-') {
+ ++buf;
+ sign = 1;
+ if (buf == end) {
+ return 0;
+ }
+ }
+ if (*buf == '0') {
+ ++buf;
+ /* | 0x20 is lower case ASCII. */
+ if (buf != end && (*buf | 0x20) == 'x') {
+ k = grisu3_parse_hex_fp(buf, end, sign, result);
+ if (k == buf) {
+ return mark;
+ }
+ return k;
+ }
+ /* Not worthwhile, except for getting the scale of integer part. */
+ while (buf != end && *buf == '0') {
+ ++buf;
+ }
+ } else {
+ if (*buf < '1' || *buf > '9') {
+ /*
+ * If we didn't see a sign, just don't recognize it as
+ * number, otherwise make it an error.
+ */
+ return sign ? 0 : mark;
+ }
+ fraction = (uint64_t)(*buf++ - '0');
+ }
+ k = buf;
+ /*
+ * We do not catch trailing zeroes when there is no decimal point.
+ * This misses an opportunity for moving the exponent down into the
+ * fast case. But it is unlikely to be worthwhile as it complicates
+ * parsing.
+ */
+ while (buf != end && *buf >= '0' && *buf <= '9') {
+ if (fraction >= UINT64_MAX / 10) {
+ fraction += *buf >= '5';
+ ulp_half_error = 1;
+ break;
+ }
+ fraction = fraction * 10 + (uint64_t)(*buf++ - '0');
+ }
+ fraction_exp = (int)(buf - k);
+ /* Skip surplus digits. Trailing zero does not introduce error. */
+ while (buf != end && *buf == '0') {
+ ++exponent;
+ ++buf;
+ }
+ if (buf != end && *buf >= '1' && *buf <= '9') {
+ ulp_half_error = 1;
+ ++exponent;
+ ++buf;
+ while (buf != end && *buf >= '0' && *buf <= '9') {
+ ++exponent;
+ ++buf;
+ }
+ }
+ if (buf != end && *buf == '.') {
+ ++buf;
+ k = buf;
+ if (*buf < '0' || *buf > '9') {
+ /* We don't accept numbers without leading or trailing digit. */
+ return 0;
+ }
+ while (buf != end && *buf >= '0' && *buf <= '9') {
+ if (fraction >= UINT64_MAX / 10) {
+ if (!ulp_half_error) {
+ fraction += *buf >= '5';
+ ulp_half_error = 1;
+ }
+ break;
+ }
+ fraction = fraction * 10 + (uint64_t)(*buf++ - '0');
+ --exponent;
+ }
+ fraction_exp += (int)(buf - k);
+ while (buf != end && *buf == '0') {
+ ++exponent;
+ ++buf;
+ }
+ if (buf != end && *buf >= '1' && *buf <= '9') {
+ ulp_half_error = 1;
+ ++buf;
+ while (buf != end && *buf >= '0' && *buf <= '9') {
+ ++buf;
+ }
+ }
+ }
+ /*
+ * Normalized exponent e.g: 1.23434e3 with fraction = 123434,
+ * fraction_exp = 5, exponent = 3.
+ * So value = fraction * 10^(exponent - fraction_exp)
+ */
+ exponent += fraction_exp;
+ if (buf != end && (*buf | 0x20) == 'e') {
+ if (end - buf < 2) {
+ return 0;
+ }
+ ++buf;
+ if (*buf == '+') {
+ ++buf;
+ if (buf == end) {
+ return 0;
+ }
+ } else if (*buf == '-') {
+ esign = 1;
+ ++buf;
+ if (buf == end) {
+ return 0;
+ }
+ }
+ if (*buf < '0' || *buf > '9') {
+ return 0;
+ }
+ ee = *buf++ - '0';
+ while (buf != end && *buf >= '0' && *buf <= '9') {
+ /*
+ * This test impacts performance and we do not need an
+ * exact value just one large enough to dominate the fraction_exp.
+ * Subsequent handling maps large absolute ee to 0 or infinity.
+ */
+ if (ee <= 0x7fff) {
+ ee = ee * 10 + *buf - '0';
+ }
+ ++buf;
+ }
+ }
+ exponent = exponent + (esign ? -ee : ee);
+
+ /*
+ * Exponent is now a base 10 normalized exponent so the absolute value
+ * is less the 10^(exponent + 1) for positive exponents. For
+ * denormalized doubles (using 11 bit exponent 0 with a fraction
+ * shiftet down, extra small numbers can be achieved.
+ *
+ * https://en.wikipedia.org/wiki/Double-precision_floating-point_format
+ *
+ * 10^-324 holds the smallest normalized exponent (but not value) and
+ * 10^308 holds the largest exponent. Internally our lookup table is
+ * only safe to use within a range slightly larger than this.
+ * Externally, a slightly larger/smaller value represents NaNs which
+ * are technically also possible to store as a number.
+ *
+ */
+
+ /* This also protects strod fallback parsing. */
+ if (buf == end) {
+ return 0;
+ }
+ return grisu3_encode_double(mark, buf, sign, fraction, exponent, fraction_exp, ulp_half_error, result);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GRISU3_PARSE_H */
diff --git a/external/grisu3/grisu3_print.h b/external/grisu3/grisu3_print.h
new file mode 100644
index 0000000..d748408
--- /dev/null
+++ b/external/grisu3/grisu3_print.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2016 Mikkel F. Jørgensen, dvide.com
+ * Copyright author of MathGeoLib (https://github.com/juj)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+/*
+ * Extracted from MathGeoLib.
+ *
+ * mikkelfj:
+ * - Fixed final output when printing single digit negative exponent to
+ * have leading zero (important for JSON).
+ * - Changed formatting to prefer 0.012 over 1.2-e-2.
+ *
+ * Large portions of the original grisu3.c file has been moved to
+ * grisu3_math.h, the rest is placed here.
+ *
+ * See also comments in grisu3_math.h.
+ *
+ * MatGeoLib grisu3.c comment:
+ *
+ * This file is part of an implementation of the "grisu3" double to string
+ * conversion algorithm described in the research paper
+ *
+ * "Printing Floating-Point Numbers Quickly And Accurately with Integers"
+ * by Florian Loitsch, available at
+ * http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
+ */
+
+#ifndef GRISU3_PRINT_H
+#define GRISU3_PRINT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h> /* sprintf, only needed for fallback printing */
+#include <assert.h> /* assert */
+
+#include "grisu3_math.h"
+
+/*
+ * The lightweight "portable" C library recognizes grisu3 support if
+ * included first.
+ */
+#define grisu3_print_double_is_defined 1
+
+/*
+ * Not sure we have an exact definition, but we get up to 23
+ * emperically. There is some math ensuring it does not go awol though,
+ * like 18 digits + exponent or so.
+ * This max should be safe size buffer for printing, including zero term.
+ */
+#define GRISU3_PRINT_MAX 30
+
+static int grisu3_round_weed(char *buffer, int len, uint64_t wp_W, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t ulp)
+{
+ uint64_t wp_Wup = wp_W - ulp;
+ uint64_t wp_Wdown = wp_W + ulp;
+ while(rest < wp_Wup && delta - rest >= ten_kappa
+ && (rest + ten_kappa < wp_Wup || wp_Wup - rest >= rest + ten_kappa - wp_Wup))
+ {
+ --buffer[len-1];
+ rest += ten_kappa;
+ }
+ if (rest < wp_Wdown && delta - rest >= ten_kappa
+ && (rest + ten_kappa < wp_Wdown || wp_Wdown - rest > rest + ten_kappa - wp_Wdown))
+ return 0;
+
+ return 2*ulp <= rest && rest <= delta - 4*ulp;
+}
+
+static int grisu3_digit_gen(grisu3_diy_fp_t low, grisu3_diy_fp_t w, grisu3_diy_fp_t high, char *buffer, int *length, int *kappa)
+{
+ uint64_t unit = 1;
+ grisu3_diy_fp_t too_low = { low.f - unit, low.e };
+ grisu3_diy_fp_t too_high = { high.f + unit, high.e };
+ grisu3_diy_fp_t unsafe_interval = grisu3_diy_fp_minus(too_high, too_low);
+ grisu3_diy_fp_t one = { 1ULL << -w.e, w.e };
+ uint32_t p1 = (uint32_t)(too_high.f >> -one.e);
+ uint64_t p2 = too_high.f & (one.f - 1);
+ uint32_t div;
+ *kappa = grisu3_largest_pow10(p1, GRISU3_DIY_FP_FRACT_SIZE + one.e, &div);
+ *length = 0;
+
+ while(*kappa > 0)
+ {
+ uint64_t rest;
+ char digit = (char)(p1 / div);
+ buffer[*length] = '0' + digit;
+ ++*length;
+ p1 %= div;
+ --*kappa;
+ rest = ((uint64_t)p1 << -one.e) + p2;
+ if (rest < unsafe_interval.f) return grisu3_round_weed(buffer, *length, grisu3_diy_fp_minus(too_high, w).f, unsafe_interval.f, rest, (uint64_t)div << -one.e, unit);
+ div /= 10;
+ }
+
+ for(;;)
+ {
+ char digit;
+ p2 *= 10;
+ unit *= 10;
+ unsafe_interval.f *= 10;
+ /* Integer division by one. */
+ digit = (char)(p2 >> -one.e);
+ buffer[*length] = '0' + digit;
+ ++*length;
+ p2 &= one.f - 1; /* Modulo by one. */
+ --*kappa;
+ if (p2 < unsafe_interval.f) return grisu3_round_weed(buffer, *length, grisu3_diy_fp_minus(too_high, w).f * unit, unsafe_interval.f, p2, one.f, unit);
+ }
+}
+
+static int grisu3(double v, char *buffer, int *length, int *d_exp)
+{
+ int mk, kappa, success;
+ grisu3_diy_fp_t dfp = grisu3_cast_diy_fp_from_double(v);
+ grisu3_diy_fp_t w = grisu3_diy_fp_normalize(dfp);
+
+ /* normalize boundaries */
+ grisu3_diy_fp_t t = { (dfp.f << 1) + 1, dfp.e - 1 };
+ grisu3_diy_fp_t b_plus = grisu3_diy_fp_normalize(t);
+ grisu3_diy_fp_t b_minus;
+ grisu3_diy_fp_t c_mk; /* Cached power of ten: 10^-k */
+ uint64_t u64 = grisu3_cast_uint64_from_double(v);
+ assert(v > 0 && v <= 1.7976931348623157e308); /* Grisu only handles strictly positive finite numbers. */
+ if (!(u64 & GRISU3_D64_FRACT_MASK) && (u64 & GRISU3_D64_EXP_MASK) != 0) { b_minus.f = (dfp.f << 2) - 1; b_minus.e = dfp.e - 2;} /* lower boundary is closer? */
+ else { b_minus.f = (dfp.f << 1) - 1; b_minus.e = dfp.e - 1; }
+ b_minus.f = b_minus.f << (b_minus.e - b_plus.e);
+ b_minus.e = b_plus.e;
+
+ mk = grisu3_diy_fp_cached_pow(GRISU3_MIN_TARGET_EXP - GRISU3_DIY_FP_FRACT_SIZE - w.e, &c_mk);
+
+ w = grisu3_diy_fp_multiply(w, c_mk);
+ b_minus = grisu3_diy_fp_multiply(b_minus, c_mk);
+ b_plus = grisu3_diy_fp_multiply(b_plus, c_mk);
+
+ success = grisu3_digit_gen(b_minus, w, b_plus, buffer, length, &kappa);
+ *d_exp = kappa - mk;
+ return success;
+}
+
+static int grisu3_i_to_str(int val, char *str)
+{
+ int len, i;
+ char *s;
+ char *begin = str;
+ if (val < 0) { *str++ = '-'; val = -val; }
+ s = str;
+
+ for(;;)
+ {
+ int ni = val / 10;
+ int digit = val - ni*10;
+ *s++ = (char)('0' + digit);
+ if (ni == 0)
+ break;
+ val = ni;
+ }
+ *s = '\0';
+ len = (int)(s - str);
+ for(i = 0; i < len/2; ++i)
+ {
+ char ch = str[i];
+ str[i] = str[len-1-i];
+ str[len-1-i] = ch;
+ }
+
+ return (int)(s - begin);
+}
+
+static int grisu3_print_nan(uint64_t v, char *dst)
+{
+ static char hexdigits[16] = "0123456789ABCDEF";
+ int i = 0;
+
+ dst[0] = 'N';
+ dst[1] = 'a';
+ dst[2] = 'N';
+ dst[3] = '(';
+ dst[20] = ')';
+ dst[21] = '\0';
+ dst += 4;
+ for (i = 15; i >= 0; --i) {
+ dst[i] = hexdigits[v & 0x0F];
+ v >>= 4;
+ }
+ return 21;
+}
+
+static int grisu3_print_double(double v, char *dst)
+{
+ int d_exp, len, success, decimals, i;
+ uint64_t u64 = grisu3_cast_uint64_from_double(v);
+ char *s2 = dst;
+ assert(dst);
+
+ /* Prehandle NaNs */
+ if ((u64 << 1) > 0xFFE0000000000000ULL) return grisu3_print_nan(u64, dst);
+ /* Prehandle negative values. */
+ if ((u64 & GRISU3_D64_SIGN) != 0) { *s2++ = '-'; v = -v; u64 ^= GRISU3_D64_SIGN; }
+ /* Prehandle zero. */
+ if (!u64) { *s2++ = '0'; *s2 = '\0'; return (int)(s2 - dst); }
+ /* Prehandle infinity. */
+ if (u64 == GRISU3_D64_EXP_MASK) { *s2++ = 'i'; *s2++ = 'n'; *s2++ = 'f'; *s2 = '\0'; return (int)(s2 - dst); }
+
+ success = grisu3(v, s2, &len, &d_exp);
+ /* If grisu3 was not able to convert the number to a string, then use old sprintf (suboptimal). */
+ if (!success) return sprintf(s2, "%.17g", v) + (int)(s2 - dst);
+
+ /* We now have an integer string of form "151324135" and a base-10 exponent for that number. */
+ /* Next, decide the best presentation for that string by whether to use a decimal point, or the scientific exponent notation 'e'. */
+ /* We don't pick the absolute shortest representation, but pick a balance between readability and shortness, e.g. */
+ /* 1.545056189557677e-308 could be represented in a shorter form */
+ /* 1545056189557677e-323 but that would be somewhat unreadable. */
+ decimals = GRISU3_MIN(-d_exp, GRISU3_MAX(1, len-1));
+
+ /* mikkelfj:
+ * fix zero prefix .1 => 0.1, important for JSON export.
+ * prefer unscientific notation at same length:
+ * -1.2345e-4 over -1.00012345,
+ * -1.0012345 over -1.2345e-3
+ */
+ if (d_exp < 0 && (len + d_exp) > -3 && len <= -d_exp)
+ {
+ /* mikkelfj: fix zero prefix .1 => 0.1, and short exponents 1.3e-2 => 0.013. */
+ memmove(s2 + 2 - d_exp - len, s2, (size_t)len);
+ s2[0] = '0';
+ s2[1] = '.';
+ for (i = 2; i < 2-d_exp-len; ++i) s2[i] = '0';
+ len += i;
+ }
+ else if (d_exp < 0 && len > 1) /* Add decimal point? */
+ {
+ for(i = 0; i < decimals; ++i) s2[len-i] = s2[len-i-1];
+ s2[len++ - decimals] = '.';
+ d_exp += decimals;
+ /* Need scientific notation as well? */
+ if (d_exp != 0) { s2[len++] = 'e'; len += grisu3_i_to_str(d_exp, s2+len); }
+ }
+ /* Add scientific notation? */
+ else if (d_exp < 0 || d_exp > 2) { s2[len++] = 'e'; len += grisu3_i_to_str(d_exp, s2+len); }
+ /* Add zeroes instead of scientific notation? */
+ else if (d_exp > 0) { while(d_exp-- > 0) s2[len++] = '0'; }
+ s2[len] = '\0'; /* grisu3 doesn't null terminate, so ensure termination. */
+ return (int)(s2+len-dst);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GRISU3_PRINT_H */
diff --git a/external/grisu3/grisu3_test.c b/external/grisu3/grisu3_test.c
new file mode 100644
index 0000000..930e027
--- /dev/null
+++ b/external/grisu3/grisu3_test.c
@@ -0,0 +1,141 @@
+#include <inttypes.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "grisu3_parse.h"
+#include "grisu3_print.h"
+
+#define TEST(x, s) do { \
+ if (!(x)) { \
+ fprintf(stderr, \
+ "fail: %s\n" \
+ " input: %s\n" \
+ " expected: %.17g\n" \
+ " got: %.17g\n" \
+ " binary xor: 0x%016"PRId64"\n", \
+ s, buf, expect, v, (a ^ b)); \
+ return 1; \
+ } \
+ } while (0)
+
+static int test_parse_double(char *buf)
+{
+ const char *k, *end;
+ double v, expect;
+ uint64_t a = 0, b = 0;
+ int len = strlen(buf);
+
+ end = buf + len;
+
+ expect = strtod(buf, 0);
+ /* Include '\0' in bytes being parsed to make strtod safe. */
+ k = grisu3_parse_double(buf, len, &v);
+
+ /* Make sure we parsed and accepted everything. */
+ TEST(k == end, "didn't parse to end");
+
+ a = grisu3_cast_uint64_from_double(expect);
+ b = grisu3_cast_uint64_from_double(v);
+
+#ifdef GRISU3_PARSE_ALLOW_ERROR
+ /*
+ * Just where exponent wraps, this assumption will be incorrect.
+ * TODO: need next higher double function.
+ */
+ TEST(a - b <= 1, "binary representation differs by more than lsb");
+#else
+ /* Binary comparison should match. */
+ TEST(expect == v, "double representation differs");
+ TEST(a == b, "binary representation differs");
+#endif
+
+#if 0
+ /* This will print the test data also when correct. */
+ TEST(0, "test case passed, just debugging");
+#endif
+
+ return 0;
+}
+
+/*
+ * We currently do not test grisu3_print_double because
+ * it is a direct port of dtoa_grisu3 from grisu3.c
+ * which presumably has been tested in MathGeoLib.
+ *
+ * grisu3_parse_double is a new implementation.
+ */
+int test_suite()
+{
+ char buf[50];
+ int fail = 0;
+
+ fail += test_parse_double("1.23434");
+ fail += test_parse_double("1234.34");
+ fail += test_parse_double("1234.34e4");
+ fail += test_parse_double("1234.34e-4");
+ fail += test_parse_double("1.23434E+4");
+ fail += test_parse_double("3.2897984798741413E+194");
+ fail += test_parse_double("-3.2897984798741413E-194");
+
+ sprintf(buf, "3289798479874141.314124124128497098e109");
+ fail += test_parse_double(buf);
+ sprintf(buf, "3289798479874141.314124124128497098e209");
+ fail += test_parse_double(buf);
+ sprintf(buf, "-3289798479874141.314124124128497098e209");
+ fail += test_parse_double(buf);
+ sprintf(buf, "3289798479874141.314124124128497098e+209");
+ fail += test_parse_double(buf);
+ sprintf(buf, "-3289798479874141.314124124128497098e-209");
+ fail += test_parse_double(buf);
+
+ return fail;
+}
+
+void example()
+{
+ double v;
+ const char *buf = "1234.34e-4";
+ const char *x, *end;
+ char result_buf[50];
+ int len;
+
+ fprintf(stderr, "grisu3_parse_double example:\n parsing '%s' as double\n", buf);
+ /* A non-numeric terminator (e.g. '\0') is required to ensure strtod fallback is safe. */
+ len = strlen(buf);
+ end = buf + len;
+ x = grisu3_parse_double(buf, len, &v);
+ if (x == 0) {
+ fprintf(stderr, "syntax or range error\n");
+ } else if (x == buf) {
+ fprintf(stderr, "parse double failed\n");
+ } else if (x != end) {
+ fprintf(stderr, "parse double did not read everything\n");
+ } else {
+ fprintf(stderr, "got: %.17g\n", v);
+ }
+ /*
+ * TODO: with the current example: the input "0.123434" is printed
+ * as "1.23434e-1" which is sub-optimal and different from sprintf.
+ *
+ * This is not the grisu3 algorithm but a post formatting step
+ * in grisu3_print_double (originally dtoa_grisu) and may be a bug
+ * in the logic choosing the best print format.
+ * sprintf "%.17g" and "%g" both print as "0.123434"
+ */
+ fprintf(stderr, "grisu3_print_double example:\n printing %g\n", v);
+ grisu3_print_double(v, result_buf);
+ fprintf(stderr, "got: %s\n", result_buf);
+}
+
+int main()
+{
+ example();
+ fprintf(stderr, "running tests\n");
+ if (test_suite()) {
+ fprintf(stderr, "GRISU3 PARSE TEST FAILED\n");
+ return -1;
+ } else {
+ fprintf(stderr, "GRISU3 PARSE TEST PASSED\n");
+ return 0;
+ }
+}
diff --git a/external/grisu3/grisu3_test_dblcnv.c b/external/grisu3/grisu3_test_dblcnv.c
new file mode 100644
index 0000000..f0e98cc
--- /dev/null
+++ b/external/grisu3/grisu3_test_dblcnv.c
@@ -0,0 +1,482 @@
+/*
+ * Test cases from Googles Double Conversion Library
+ *
+ * https://github.com/google/double-conversion/blob/master/test/cctest/test-strtod.cc
+ *
+ * Added extra tests for grisu parse print roundtrip and negative sign.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "grisu3_print.h"
+#include "grisu3_parse.h"
+
+#define BEGIN_TEST(name) int test_ ## name() { \
+ int fail = 0; char *id = #name; double v; char *vector; \
+ char buf[1001];
+
+#define END_TEST() return fail; }
+
+
+void check_double(double x1, double x2, char *id, int line, int *fail)
+{
+ char tmp[50];
+ const char *k;
+ int n;
+ int failed = 0;
+ double v;
+
+ if (x1 != x2) {
+ failed = 1;
+ fprintf(stderr, "%d: fail (%s): %.17g != %.17g\n",
+ line, id, x1, x2);
+ } else {
+#if 1
+ n = grisu3_print_double(x1, tmp);
+ if (n >= GRISU3_PRINT_MAX) { /* Leave space for zterm. */
+ failed = 1;
+ fprintf(stderr, "%d: fail (%s): print length exceeded max: %d, input: %.17g\n",
+ line, id, n, x1);
+ } else if ((int)strlen(tmp) != n) {
+ failed = 1;
+ fprintf(stderr, "%d: fail (%s): print length does not match strlen of output, input: %.17g, got: %s\n",
+ line, id, x1, tmp);
+ } else if (!isinf(x1)) {
+ /* We do expect print/parse to handle inf. */
+ k = grisu3_parse_double(tmp, n, &v);
+ if (k == 0 || k == tmp) {
+ failed = 1;
+ fprintf(stderr, "%d: fail (%s): roundtrip parse failed "
+ "input: %g, printed value %s\n",
+ line, id, x1, tmp);
+ } else if (x1 != v) {
+ failed = 1;
+ fprintf(stderr, "%d: fail (%s): print/parse roundtrip mismatch for "
+ "input: %.17g, got %.17g\n",
+ line, id, x1, v);
+ }
+ }
+#endif
+ }
+ *fail += failed;
+}
+
+#define CHECK_EQ(v1, v2) check_double((v1), (v2), id, __LINE__, &fail)
+
+#define StringToVector(f) f
+
+#define Strtod(f, e) (sprintf(buf, "%se%d", f, e), \
+ grisu3_parse_double(buf, strlen(buf), &v), v)
+
+#define StrtodChar(f, e) (sprintf(buf, "%se%d", f, e), \
+ grisu3_parse_double(buf, strlen(buf), &v), v)
+
+#define double_infinity grisu3_double_infinity
+
+BEGIN_TEST(Strtod)
+ vector = StringToVector("0");
+ CHECK_EQ(0.0, Strtod(vector, 1));
+ CHECK_EQ(0.0, Strtod(vector, 2));
+ CHECK_EQ(0.0, Strtod(vector, -2));
+ CHECK_EQ(0.0, Strtod(vector, -999));
+ CHECK_EQ(0.0, Strtod(vector, +999));
+
+ vector = StringToVector("1");
+ CHECK_EQ(1.0, Strtod(vector, 0));
+ CHECK_EQ(10.0, Strtod(vector, 1));
+ CHECK_EQ(100.0, Strtod(vector, 2));
+ CHECK_EQ(1e20, Strtod(vector, 20));
+ CHECK_EQ(1e22, Strtod(vector, 22));
+ CHECK_EQ(1e23, Strtod(vector, 23));
+
+ CHECK_EQ(1e35, Strtod(vector, 35));
+ CHECK_EQ(1e36, Strtod(vector, 36));
+ CHECK_EQ(1e37, Strtod(vector, 37));
+ CHECK_EQ(1e-1, Strtod(vector, -1));
+ CHECK_EQ(1e-2, Strtod(vector, -2));
+ CHECK_EQ(1e-5, Strtod(vector, -5));
+ CHECK_EQ(1e-20, Strtod(vector, -20));
+ CHECK_EQ(1e-22, Strtod(vector, -22));
+ CHECK_EQ(1e-23, Strtod(vector, -23));
+ CHECK_EQ(1e-25, Strtod(vector, -25));
+ CHECK_EQ(1e-39, Strtod(vector, -39));
+
+ vector = StringToVector("2");
+ CHECK_EQ(2.0, Strtod(vector, 0));
+ CHECK_EQ(20.0, Strtod(vector, 1));
+ CHECK_EQ(200.0, Strtod(vector, 2));
+ CHECK_EQ(2e20, Strtod(vector, 20));
+ CHECK_EQ(2e22, Strtod(vector, 22));
+ CHECK_EQ(2e23, Strtod(vector, 23));
+ CHECK_EQ(2e35, Strtod(vector, 35));
+ CHECK_EQ(2e36, Strtod(vector, 36));
+ CHECK_EQ(2e37, Strtod(vector, 37));
+ CHECK_EQ(2e-1, Strtod(vector, -1));
+ CHECK_EQ(2e-2, Strtod(vector, -2));
+ CHECK_EQ(2e-5, Strtod(vector, -5));
+ CHECK_EQ(2e-20, Strtod(vector, -20));
+ CHECK_EQ(2e-22, Strtod(vector, -22));
+ CHECK_EQ(2e-23, Strtod(vector, -23));
+ CHECK_EQ(2e-25, Strtod(vector, -25));
+ CHECK_EQ(2e-39, Strtod(vector, -39));
+
+ vector = StringToVector("9");
+ CHECK_EQ(9.0, Strtod(vector, 0));
+ CHECK_EQ(90.0, Strtod(vector, 1));
+ CHECK_EQ(900.0, Strtod(vector, 2));
+ CHECK_EQ(9e20, Strtod(vector, 20));
+ CHECK_EQ(9e22, Strtod(vector, 22));
+ CHECK_EQ(9e23, Strtod(vector, 23));
+ CHECK_EQ(9e35, Strtod(vector, 35));
+ CHECK_EQ(9e36, Strtod(vector, 36));
+ CHECK_EQ(9e37, Strtod(vector, 37));
+ CHECK_EQ(9e-1, Strtod(vector, -1));
+ CHECK_EQ(9e-2, Strtod(vector, -2));
+ CHECK_EQ(9e-5, Strtod(vector, -5));
+ CHECK_EQ(9e-20, Strtod(vector, -20));
+ CHECK_EQ(9e-22, Strtod(vector, -22));
+ CHECK_EQ(9e-23, Strtod(vector, -23));
+ CHECK_EQ(9e-25, Strtod(vector, -25));
+ CHECK_EQ(9e-39, Strtod(vector, -39));
+
+ vector = StringToVector("12345");
+ CHECK_EQ(12345.0, Strtod(vector, 0));
+ CHECK_EQ(123450.0, Strtod(vector, 1));
+ CHECK_EQ(1234500.0, Strtod(vector, 2));
+ CHECK_EQ(12345e20, Strtod(vector, 20));
+ CHECK_EQ(12345e22, Strtod(vector, 22));
+ CHECK_EQ(12345e23, Strtod(vector, 23));
+ CHECK_EQ(12345e30, Strtod(vector, 30));
+ CHECK_EQ(12345e31, Strtod(vector, 31));
+ CHECK_EQ(12345e32, Strtod(vector, 32));
+ CHECK_EQ(12345e35, Strtod(vector, 35));
+ CHECK_EQ(12345e36, Strtod(vector, 36));
+ CHECK_EQ(12345e37, Strtod(vector, 37));
+ CHECK_EQ(12345e-1, Strtod(vector, -1));
+ CHECK_EQ(12345e-2, Strtod(vector, -2));
+ CHECK_EQ(12345e-5, Strtod(vector, -5));
+ CHECK_EQ(12345e-20, Strtod(vector, -20));
+ CHECK_EQ(12345e-22, Strtod(vector, -22));
+ CHECK_EQ(12345e-23, Strtod(vector, -23));
+ CHECK_EQ(12345e-25, Strtod(vector, -25));
+ CHECK_EQ(12345e-39, Strtod(vector, -39));
+
+ vector = StringToVector("12345678901234");
+ CHECK_EQ(12345678901234.0, Strtod(vector, 0));
+ CHECK_EQ(123456789012340.0, Strtod(vector, 1));
+ CHECK_EQ(1234567890123400.0, Strtod(vector, 2));
+ CHECK_EQ(12345678901234e20, Strtod(vector, 20));
+ CHECK_EQ(12345678901234e22, Strtod(vector, 22));
+ CHECK_EQ(12345678901234e23, Strtod(vector, 23));
+ CHECK_EQ(12345678901234e30, Strtod(vector, 30));
+ CHECK_EQ(12345678901234e31, Strtod(vector, 31));
+ CHECK_EQ(12345678901234e32, Strtod(vector, 32));
+ CHECK_EQ(12345678901234e35, Strtod(vector, 35));
+ CHECK_EQ(12345678901234e36, Strtod(vector, 36));
+ CHECK_EQ(12345678901234e37, Strtod(vector, 37));
+ CHECK_EQ(12345678901234e-1, Strtod(vector, -1));
+ CHECK_EQ(12345678901234e-2, Strtod(vector, -2));
+ CHECK_EQ(12345678901234e-5, Strtod(vector, -5));
+ CHECK_EQ(12345678901234e-20, Strtod(vector, -20));
+ CHECK_EQ(12345678901234e-22, Strtod(vector, -22));
+ CHECK_EQ(12345678901234e-23, Strtod(vector, -23));
+ CHECK_EQ(12345678901234e-25, Strtod(vector, -25));
+ CHECK_EQ(12345678901234e-39, Strtod(vector, -39));
+
+ vector = StringToVector("123456789012345");
+ CHECK_EQ(123456789012345.0, Strtod(vector, 0));
+ CHECK_EQ(1234567890123450.0, Strtod(vector, 1));
+ CHECK_EQ(12345678901234500.0, Strtod(vector, 2));
+ CHECK_EQ(123456789012345e20, Strtod(vector, 20));
+ CHECK_EQ(123456789012345e22, Strtod(vector, 22));
+ CHECK_EQ(123456789012345e23, Strtod(vector, 23));
+ CHECK_EQ(123456789012345e35, Strtod(vector, 35));
+ CHECK_EQ(123456789012345e36, Strtod(vector, 36));
+ CHECK_EQ(123456789012345e37, Strtod(vector, 37));
+ CHECK_EQ(123456789012345e39, Strtod(vector, 39));
+ CHECK_EQ(123456789012345e-1, Strtod(vector, -1));
+ CHECK_EQ(123456789012345e-2, Strtod(vector, -2));
+ CHECK_EQ(123456789012345e-5, Strtod(vector, -5));
+ CHECK_EQ(123456789012345e-20, Strtod(vector, -20));
+ CHECK_EQ(123456789012345e-22, Strtod(vector, -22));
+ CHECK_EQ(123456789012345e-23, Strtod(vector, -23));
+ CHECK_EQ(123456789012345e-25, Strtod(vector, -25));
+ CHECK_EQ(123456789012345e-39, Strtod(vector, -39));
+ CHECK_EQ(0.0, StrtodChar("0", 12345));
+
+ CHECK_EQ(0.0, StrtodChar("", 1324));
+ CHECK_EQ(0.0, StrtodChar("000000000", 123));
+ CHECK_EQ(0.0, StrtodChar("2", -324));
+ CHECK_EQ(4e-324, StrtodChar("3", -324));
+
+ // It would be more readable to put non-zero literals on the left side (i.e.
+ // CHECK_EQ(1e-325, StrtodChar("1", -325))), but then Gcc complains that
+ // they are truncated to zero.
+ CHECK_EQ(0.0, StrtodChar("1", -325));
+ CHECK_EQ(0.0, StrtodChar("1", -325));
+ CHECK_EQ(0.0, StrtodChar("20000", -328));
+ CHECK_EQ(40000e-328, StrtodChar("30000", -328));
+ CHECK_EQ(0.0, StrtodChar("10000", -329));
+ CHECK_EQ(0.0, StrtodChar("90000", -329));
+ CHECK_EQ(0.0, StrtodChar("000000001", -325));
+ CHECK_EQ(0.0, StrtodChar("000000001", -325));
+ CHECK_EQ(0.0, StrtodChar("0000000020000", -328));
+ CHECK_EQ(40000e-328, StrtodChar("00000030000", -328));
+ CHECK_EQ(0.0, StrtodChar("0000000010000", -329));
+ CHECK_EQ(0.0, StrtodChar("0000000090000", -329));
+
+
+ // It would be more readable to put the literals (and not double_infinity)
+ // on the left side (i.e. CHECK_EQ(1e309, StrtodChar("1", 309))), but then Gcc
+ // complains that the floating constant exceeds range of 'double'.
+
+ CHECK_EQ(double_infinity, StrtodChar("1", 309));
+
+ CHECK_EQ(1e308, StrtodChar("1", 308));
+ CHECK_EQ(1234e305, StrtodChar("1234", 305));
+ CHECK_EQ(1234e304, StrtodChar("1234", 304));
+
+ CHECK_EQ(double_infinity, StrtodChar("18", 307));
+ CHECK_EQ(17e307, StrtodChar("17", 307));
+
+ CHECK_EQ(double_infinity, StrtodChar("0000001", 309));
+
+ CHECK_EQ(1e308, StrtodChar("00000001", 308));
+
+ CHECK_EQ(1234e305, StrtodChar("00000001234", 305));
+ CHECK_EQ(1234e304, StrtodChar("000000001234", 304));
+ CHECK_EQ(double_infinity, StrtodChar("0000000018", 307));
+ CHECK_EQ(17e307, StrtodChar("0000000017", 307));
+ CHECK_EQ(double_infinity, StrtodChar("1000000", 303));
+ CHECK_EQ(1e308, StrtodChar("100000", 303));
+ CHECK_EQ(1234e305, StrtodChar("123400000", 300));
+ CHECK_EQ(1234e304, StrtodChar("123400000", 299));
+ CHECK_EQ(double_infinity, StrtodChar("180000000", 300));
+ CHECK_EQ(17e307, StrtodChar("170000000", 300));
+ CHECK_EQ(double_infinity, StrtodChar("00000001000000", 303));
+ CHECK_EQ(1e308, StrtodChar("000000000000100000", 303));
+ CHECK_EQ(1234e305, StrtodChar("00000000123400000", 300));
+ CHECK_EQ(1234e304, StrtodChar("0000000123400000", 299));
+ CHECK_EQ(double_infinity, StrtodChar("00000000180000000", 300));
+ CHECK_EQ(17e307, StrtodChar("00000000170000000", 300));
+ CHECK_EQ(1.7976931348623157E+308, StrtodChar("17976931348623157", 292));
+ CHECK_EQ(1.7976931348623158E+308, StrtodChar("17976931348623158", 292));
+ CHECK_EQ(double_infinity, StrtodChar("17976931348623159", 292));
+
+ // The following number is the result of 89255.0/1e-22. Both floating-point
+ // numbers can be accurately represented with doubles. However on Linux,x86
+ // the floating-point stack is set to 80bits and the double-rounding
+ // introduces an error.
+ CHECK_EQ(89255e-22, StrtodChar("89255", -22));
+
+ // Some random values.
+ CHECK_EQ(358416272e-33, StrtodChar("358416272", -33));
+ CHECK_EQ(104110013277974872254e-225,
+ StrtodChar("104110013277974872254", -225));
+
+ CHECK_EQ(123456789e108, StrtodChar("123456789", 108));
+ CHECK_EQ(123456789e109, StrtodChar("123456789", 109));
+ CHECK_EQ(123456789e110, StrtodChar("123456789", 110));
+ CHECK_EQ(123456789e111, StrtodChar("123456789", 111));
+ CHECK_EQ(123456789e112, StrtodChar("123456789", 112));
+ CHECK_EQ(123456789e113, StrtodChar("123456789", 113));
+ CHECK_EQ(123456789e114, StrtodChar("123456789", 114));
+ CHECK_EQ(123456789e115, StrtodChar("123456789", 115));
+
+ CHECK_EQ(1234567890123456789012345e108,
+ StrtodChar("1234567890123456789012345", 108));
+ CHECK_EQ(1234567890123456789012345e109,
+ StrtodChar("1234567890123456789012345", 109));
+ CHECK_EQ(1234567890123456789012345e110,
+ StrtodChar("1234567890123456789012345", 110));
+ CHECK_EQ(1234567890123456789012345e111,
+ StrtodChar("1234567890123456789012345", 111));
+ CHECK_EQ(1234567890123456789012345e112,
+ StrtodChar("1234567890123456789012345", 112));
+ CHECK_EQ(1234567890123456789012345e113,
+ StrtodChar("1234567890123456789012345", 113));
+ CHECK_EQ(1234567890123456789012345e114,
+ StrtodChar("1234567890123456789012345", 114));
+ CHECK_EQ(1234567890123456789012345e115,
+ StrtodChar("1234567890123456789012345", 115));
+ CHECK_EQ(1234567890123456789052345e108,
+ StrtodChar("1234567890123456789052345", 108));
+ CHECK_EQ(1234567890123456789052345e109,
+ StrtodChar("1234567890123456789052345", 109));
+ CHECK_EQ(1234567890123456789052345e110,
+ StrtodChar("1234567890123456789052345", 110));
+ CHECK_EQ(1234567890123456789052345e111,
+ StrtodChar("1234567890123456789052345", 111));
+ CHECK_EQ(1234567890123456789052345e112,
+ StrtodChar("1234567890123456789052345", 112));
+ CHECK_EQ(1234567890123456789052345e113,
+ StrtodChar("1234567890123456789052345", 113));
+ CHECK_EQ(1234567890123456789052345e114,
+ StrtodChar("1234567890123456789052345", 114));
+ CHECK_EQ(1234567890123456789052345e115,
+ StrtodChar("1234567890123456789052345", 115));
+ CHECK_EQ(5.445618932859895e-255,
+ StrtodChar("5445618932859895362967233318697132813618813095743952975"
+ "4392982234069699615600475529427176366709107287468930197"
+ "8628345413991790019316974825934906752493984055268219809"
+ "5012176093045431437495773903922425632551857520884625114"
+ "6241265881735209066709685420744388526014389929047617597"
+ "0302268848374508109029268898695825171158085457567481507"
+ "4162979705098246243690189880319928315307816832576838178"
+ "2563074014542859888710209237525873301724479666744537857"
+ "9026553346649664045621387124193095870305991178772256504"
+ "4368663670643970181259143319016472430928902201239474588"
+ "1392338901353291306607057623202353588698746085415097902"
+ "6640064319118728664842287477491068264828851624402189317"
+ "2769161449825765517353755844373640588822904791244190695"
+ "2998382932630754670573838138825217065450843010498555058"
+ "88186560731", -1035));
+
+ // Boundary cases. Boundaries themselves should round to even.
+ //
+ // 0x1FFFFFFFFFFFF * 2^3 = 72057594037927928
+ // next: 72057594037927936
+ // boundary: 72057594037927932 should round up.
+ CHECK_EQ(72057594037927928.0, StrtodChar("72057594037927928", 0));
+ CHECK_EQ(72057594037927936.0, StrtodChar("72057594037927936", 0));
+ CHECK_EQ(72057594037927936.0, StrtodChar("72057594037927932", 0));
+ CHECK_EQ(72057594037927928.0, StrtodChar("7205759403792793199999", -5));
+ CHECK_EQ(72057594037927936.0, StrtodChar("7205759403792793200001", -5));
+
+ // 0x1FFFFFFFFFFFF * 2^10 = 9223372036854774784
+ // next: 9223372036854775808
+ // boundary: 9223372036854775296 should round up.
+ CHECK_EQ(9223372036854774784.0, StrtodChar("9223372036854774784", 0));
+ CHECK_EQ(9223372036854775808.0, StrtodChar("9223372036854775808", 0));
+ CHECK_EQ(9223372036854775808.0, StrtodChar("9223372036854775296", 0));
+
+ CHECK_EQ(9223372036854774784.0, StrtodChar("922337203685477529599999", -5));
+ CHECK_EQ(9223372036854775808.0, StrtodChar("922337203685477529600001", -5));
+
+ // 0x1FFFFFFFFFFFF * 2^50 = 10141204801825834086073718800384
+ // next: 10141204801825835211973625643008
+ // boundary: 10141204801825834649023672221696 should round up.
+ //
+ CHECK_EQ(10141204801825834086073718800384.0,
+ StrtodChar("10141204801825834086073718800384", 0));
+ CHECK_EQ(10141204801825835211973625643008.0,
+ StrtodChar("10141204801825835211973625643008", 0));
+ CHECK_EQ(10141204801825835211973625643008.0,
+ StrtodChar("10141204801825834649023672221696", 0));
+ CHECK_EQ(10141204801825834086073718800384.0,
+ StrtodChar("1014120480182583464902367222169599999", -5));
+ CHECK_EQ(10141204801825835211973625643008.0,
+ StrtodChar("1014120480182583464902367222169600001", -5));
+ // 0x1FFFFFFFFFFFF * 2^99 = 5708990770823838890407843763683279797179383808
+ // next: 5708990770823839524233143877797980545530986496
+ // boundary: 5708990770823839207320493820740630171355185152
+ // The boundary should round up.
+ CHECK_EQ(5708990770823838890407843763683279797179383808.0,
+ StrtodChar("5708990770823838890407843763683279797179383808", 0));
+ CHECK_EQ(5708990770823839524233143877797980545530986496.0,
+ StrtodChar("5708990770823839524233143877797980545530986496", 0));
+ CHECK_EQ(5708990770823839524233143877797980545530986496.0,
+ StrtodChar("5708990770823839207320493820740630171355185152", 0));
+ CHECK_EQ(5708990770823838890407843763683279797179383808.0,
+ StrtodChar("5708990770823839207320493820740630171355185151999", -3));
+ CHECK_EQ(5708990770823839524233143877797980545530986496.0,
+ StrtodChar("5708990770823839207320493820740630171355185152001", -3));
+
+ // The following test-cases got some public attention in early 2011 when they
+ // sent Java and PHP into an infinite loop.
+ CHECK_EQ(2.225073858507201e-308, StrtodChar("22250738585072011", -324));
+ CHECK_EQ(2.22507385850720138309e-308,
+ StrtodChar("22250738585072011360574097967091319759348195463516456480"
+ "23426109724822222021076945516529523908135087914149158913"
+ "03962110687008643869459464552765720740782062174337998814"
+ "10632673292535522868813721490129811224514518898490572223"
+ "07285255133155755015914397476397983411801999323962548289"
+ "01710708185069063066665599493827577257201576306269066333"
+ "26475653000092458883164330377797918696120494973903778297"
+ "04905051080609940730262937128958950003583799967207254304"
+ "36028407889577179615094551674824347103070260914462157228"
+ "98802581825451803257070188608721131280795122334262883686"
+ "22321503775666622503982534335974568884423900265498198385"
+ "48794829220689472168983109969836584681402285424333066033"
+ "98508864458040010349339704275671864433837704860378616227"
+ "71738545623065874679014086723327636718751", -1076));
+END_TEST()
+
+
+/* Non-google test */
+BEGIN_TEST(grisu3_print_double)
+ vector = "13";
+ CHECK_EQ(13e-2, Strtod(vector, -2));
+ CHECK_EQ(13e-3, Strtod(vector, -3));
+
+ vector = "-13";
+ CHECK_EQ(-13e-2, Strtod(vector, -2));
+ CHECK_EQ(-13e-3, Strtod(vector, -3));
+ vector = "-1";
+ CHECK_EQ(-1e-2, Strtod(vector, -2));
+ CHECK_EQ(-1e-3, Strtod(vector, -3));
+
+ CHECK_EQ(-1e1, StrtodChar("-1", 1));
+ CHECK_EQ(-1e+1, StrtodChar("-1", 1));
+ CHECK_EQ(-1e-0, StrtodChar("-1", -0));
+ CHECK_EQ(-1e-1, StrtodChar("-1", -1));
+ CHECK_EQ(-1e-2, StrtodChar("-1", -2));
+ CHECK_EQ(-1e-3, StrtodChar("-1", -3));
+ CHECK_EQ(-1e-4, StrtodChar("-1", -4));
+
+ CHECK_EQ(-12e1, StrtodChar("-12", 1));
+ CHECK_EQ(-12e+1, StrtodChar("-12", 1));
+ CHECK_EQ(-12e-0, StrtodChar("-12", -0));
+ CHECK_EQ(-12e-1, StrtodChar("-12", -1));
+ CHECK_EQ(-12e-2, StrtodChar("-12", -2));
+ CHECK_EQ(-12e-3, StrtodChar("-12", -3));
+ CHECK_EQ(-12e-4, StrtodChar("-12", -4));
+
+ CHECK_EQ(-123e1, StrtodChar("-123", 1));
+ CHECK_EQ(-123e+1, StrtodChar("-123", 1));
+ CHECK_EQ(-123e-0, StrtodChar("-123", -0));
+ CHECK_EQ(-123e-1, StrtodChar("-123", -1));
+ CHECK_EQ(-123e-2, StrtodChar("-123", -2));
+ CHECK_EQ(-123e-3, StrtodChar("-123", -3));
+ CHECK_EQ(-123e-4, StrtodChar("-123", -4));
+
+ CHECK_EQ(-1234e1, StrtodChar("-1234", 1));
+ CHECK_EQ(-1234e+1, StrtodChar("-1234", 1));
+ CHECK_EQ(-1234e-0, StrtodChar("-1234", -0));
+ CHECK_EQ(-1234e-1, StrtodChar("-1234", -1));
+ CHECK_EQ(-1234e-2, StrtodChar("-1234", -2));
+ CHECK_EQ(-1234e-3, StrtodChar("-1234", -3));
+ CHECK_EQ(-1234e-4, StrtodChar("-1234", -4));
+
+ CHECK_EQ(-12345e1, StrtodChar("-12345", 1));
+ CHECK_EQ(-12345e+1, StrtodChar("-12345", 1));
+ CHECK_EQ(-12345e-0, StrtodChar("-12345", -0));
+ CHECK_EQ(-12345e-1, StrtodChar("-12345", -1));
+ CHECK_EQ(-12345e-2, StrtodChar("-12345", -2));
+ CHECK_EQ(-12345e-3, StrtodChar("-12345", -3));
+ CHECK_EQ(-12345e-4, StrtodChar("-12345", -4));
+
+ CHECK_EQ(-12345e-5, StrtodChar("-12345", -5));
+ CHECK_EQ(-12345e-6, StrtodChar("-12345", -6));
+ CHECK_EQ(-12345e-7, StrtodChar("-12345", -7));
+ CHECK_EQ(-12345e-8, StrtodChar("-12345", -8));
+ CHECK_EQ(-12345e-9, StrtodChar("-12345", -9));
+ CHECK_EQ(-12345e-10, StrtodChar("-12345", -10));
+END_TEST()
+
+int main()
+{
+ int fail = 0;
+
+ fail += test_Strtod();
+ fail += test_grisu3_print_double();
+
+ if (fail) {
+ fprintf(stderr, "FAILURE\n");
+ return -1;
+ }
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/external/grisu3/test.sh b/external/grisu3/test.sh
new file mode 100755
index 0000000..1794fbb
--- /dev/null
+++ b/external/grisu3/test.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+set -e
+
+cd $(dirname $0)
+mkdir -p build
+
+CC=cc
+
+$CC -g -Wall -Wextra $INCLUDE -I.. grisu3_test.c -lm -o build/grisu3_test_d
+$CC -DNDEBUG -Wall -Wextra -O2 $INCLUDE -I.. grisu3_test.c -lm -o build/grisu3_test
+echo "DEBUG:"
+build/grisu3_test_d
+echo "OPTIMIZED:"
+build/grisu3_test
+
+echo "running double conversion tests"
+./test_dblcnv.sh
diff --git a/external/grisu3/test_dblcnv.sh b/external/grisu3/test_dblcnv.sh
new file mode 100755
index 0000000..89f58f4
--- /dev/null
+++ b/external/grisu3/test_dblcnv.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+cd $(dirname $0)
+mkdir -p build
+
+CC=cc
+
+$CC -g -Wall -Wextra $INCLUDE -I.. grisu3_test_dblcnv.c -o build/grisu3_test_dblcnv_d
+$CC -DNDEBUG -Wall -Wextra -O2 $INCLUDE -I.. grisu3_test_dblcnv.c -o build/grisu3_test_dblcnv
+echo "DEBUG:"
+build/grisu3_test_dblcnv_d
+echo "OPTIMIZED:"
+build/grisu3_test_dblcnv
diff --git a/external/hash/.gitignore b/external/hash/.gitignore
new file mode 100644
index 0000000..a007fea
--- /dev/null
+++ b/external/hash/.gitignore
@@ -0,0 +1 @@
+build/*
diff --git a/external/hash/CMakeLists.txt b/external/hash/CMakeLists.txt
new file mode 100644
index 0000000..7b7d990
--- /dev/null
+++ b/external/hash/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required (VERSION 3.0.2)
+
+project (HashTest)
+
+SET(CMAKE_C_FLAGS_DEBUG "-g")
+SET(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+add_executable (hash_test hash_test.c str_set.c token_map.c ht32.c ht64.c ht32rh.c ht64rh.c cmetrohash64.c)
+add_executable (hash_test_32 hash_test.c str_set.c token_map.c ht32.c ht64.c ht32rh.c ht64rh.c PMurHash.c)
+add_executable (hash_test_rh hash_test.c str_set.c token_map.c ht32.c ht64.c ht32rh.c ht64rh.c cmetrohash64.c)
+
+target_compile_definitions(hash_test_32 PRIVATE
+ -DHT_HASH_32)
+target_compile_definitions(hash_test_rh PRIVATE
+ -DSTR_SET_RH -DTOKEN_MAP_RH)
+
+add_executable (load_test load_test.c ptr_set.c)
+# robin hood hash table
+add_executable (load_test_rh load_test.c ptr_set.c)
+
+target_compile_definitions(load_test PRIVATE
+ -DPTR_SET_INT_HASH)
+target_compile_definitions(load_test_rh PRIVATE
+ -DPTR_SET_RH -DPTR_SET_INT_HASH)
+
+# default hash function
+add_executable (load_test_d load_test.c ptr_set.c cmetrohash64.c)
+add_executable (load_test_d_rh load_test.c ptr_set.c cmetrohash64.c)
+target_compile_definitions(load_test_rh PRIVATE
+ -DPTR_SET_RH)
+
+add_test(hash_test hash_test)
+add_test(hash_test_32 hash_test_32)
+add_test(hash_test_rh hash_test_rh)
+add_test(load_test load_test)
+add_test(load_test_rh load_test_rh)
+
+enable_testing()
diff --git a/external/hash/LICENSE b/external/hash/LICENSE
new file mode 100644
index 0000000..a561b5f
--- /dev/null
+++ b/external/hash/LICENSE
@@ -0,0 +1,28 @@
+This license applies to the content of the current directory.
+
+Some sources are externally provided - see respective file headers.
+All source is MIT or public domain with varying copyright.
+
+Unless otherwise stated, the following license apply:
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/external/hash/PMurHash.c b/external/hash/PMurHash.c
new file mode 100644
index 0000000..7284434
--- /dev/null
+++ b/external/hash/PMurHash.c
@@ -0,0 +1,334 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/*-----------------------------------------------------------------------------
+
+If you want to understand the MurmurHash algorithm you would be much better
+off reading the original source. Just point your browser at:
+http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+
+
+What this version provides?
+
+1. Progressive data feeding. Useful when the entire payload to be hashed
+does not fit in memory or when the data is streamed through the application.
+Also useful when hashing a number of strings with a common prefix. A partial
+hash of a prefix string can be generated and reused for each suffix string.
+
+2. Portability. Plain old C so that it should compile on any old compiler.
+Both CPU endian and access-alignment neutral, but avoiding inefficient code
+when possible depending on CPU capabilities.
+
+3. Drop in. I personally like nice self contained public domain code, making it
+easy to pilfer without loads of refactoring to work properly in the existing
+application code & makefile structure and mucking around with licence files.
+Just copy PMurHash.h and PMurHash.c and you're ready to go.
+
+
+How does it work?
+
+We can only process entire 32 bit chunks of input, except for the very end
+that may be shorter. So along with the partial hash we need to give back to
+the caller a carry containing up to 3 bytes that we were unable to process.
+This carry also needs to record the number of bytes the carry holds. I use
+the low 2 bits as a count (0..3) and the carry bytes are shifted into the
+high byte in stream order.
+
+To handle endianess I simply use a macro that reads a uint32_t and define
+that macro to be a direct read on little endian machines, a read and swap
+on big endian machines, or a byte-by-byte read if the endianess is unknown.
+
+-----------------------------------------------------------------------------*/
+
+
+#include "PMurHash.h"
+
+/* I used ugly type names in the header to avoid potential conflicts with
+ * application or system typedefs & defines. Since I'm not including any more
+ * headers below here I can rename these so that the code reads like C99 */
+#undef uint32_t
+#define uint32_t MH_UINT32
+#undef uint8_t
+#define uint8_t MH_UINT8
+
+/* MSVC warnings we choose to ignore */
+#if defined(_MSC_VER)
+ #pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * READ_UINT32(x) Read a little endian unsigned 32-bit int
+ * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries
+ * ROTL32(x,r) Rotate x left by r bits
+ */
+
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+ #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+ #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+ #define __BYTE_ORDER __LITTLE_ENDIAN
+ #define UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+ #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+ #define __BYTE_ORDER __LITTLE_ENDIAN
+ #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+ #define __BYTE_ORDER __BIG_ENDIAN
+ #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+ #if defined(__ARMEL__) || defined(__MIPSEL__)
+ #define __BYTE_ORDER __LITTLE_ENDIAN
+ #endif
+ #if defined(__ARMEB__) || defined(__MIPSEB__)
+ #define __BYTE_ORDER __BIG_ENDIAN
+ #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+ /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+ #define READ_UINT32(ptr) (*((uint32_t*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+ /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+ #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+ #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr))))
+ #else
+ /* Without a known fast bswap32 we're just as well off doing this */
+ #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+ #define UNALIGNED_SAFE
+ #endif
+#else
+ /* Unknown endianess so last resort is to read individual bytes */
+ #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+ /* Since we're not doing word-reads we can skip the messing about with realignment */
+ #define UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+ #include <stdlib.h> /* Microsoft put _rotl declaration in here */
+ #define ROTL32(x,r) _rotl(x,r)
+#else
+ /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+ #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define C1 (0xcc9e2d51)
+#define C2 (0x1b873593)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define DOBLOCK(h1, k1) do{ \
+ k1 *= C1; \
+ k1 = ROTL32(k1,15); \
+ k1 *= C2; \
+ \
+ h1 ^= k1; \
+ h1 = ROTL32(h1,13); \
+ h1 = h1*5+0xe6546b64; \
+ }while(0)
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
+ int _i = cnt; \
+ while(_i--) { \
+ c = c>>8 | *ptr++<<24; \
+ n++; len--; \
+ if(n==4) { \
+ DOBLOCK(h1, c); \
+ n = 0; \
+ } \
+ } }while(0)
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
+ * if wanted. Both ph1 and pcarry are required arguments. */
+void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
+{
+ uint32_t h1 = *ph1;
+ uint32_t c = *pcarry;
+
+ const uint8_t *ptr = (uint8_t*)key;
+ const uint8_t *end;
+
+ /* Extract carry count from low 2 bits of c value */
+ int n = c & 3;
+
+#if defined(UNALIGNED_SAFE)
+ /* This CPU handles unaligned word access */
+
+ /* Consume any carry bytes */
+ int i = (4-n) & 3;
+ if(i && i <= len) {
+ DOBYTES(i, h1, c, n, ptr, len);
+ }
+
+ /* Process 32-bit chunks */
+ end = ptr + len/4*4;
+ for( ; ptr < end ; ptr+=4) {
+ uint32_t k1 = READ_UINT32(ptr);
+ DOBLOCK(h1, k1);
+ }
+
+#else /*UNALIGNED_SAFE*/
+ /* This CPU does not handle unaligned word access */
+
+ /* Consume enough so that the next data byte is word aligned */
+ int i = -(long)ptr & 3;
+ if(i && i <= len) {
+ DOBYTES(i, h1, c, n, ptr, len);
+ }
+
+ /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+ end = ptr + len/4*4;
+ switch(n) { /* how many bytes in c */
+ case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */
+ for( ; ptr < end ; ptr+=4) {
+ uint32_t k1 = READ_UINT32(ptr);
+ DOBLOCK(h1, k1);
+ }
+ break;
+ case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */
+ for( ; ptr < end ; ptr+=4) {
+ uint32_t k1 = c>>24;
+ c = READ_UINT32(ptr);
+ k1 |= c<<8;
+ DOBLOCK(h1, k1);
+ }
+ break;
+ case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */
+ for( ; ptr < end ; ptr+=4) {
+ uint32_t k1 = c>>16;
+ c = READ_UINT32(ptr);
+ k1 |= c<<16;
+ DOBLOCK(h1, k1);
+ }
+ break;
+ case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */
+ for( ; ptr < end ; ptr+=4) {
+ uint32_t k1 = c>>8;
+ c = READ_UINT32(ptr);
+ k1 |= c<<24;
+ DOBLOCK(h1, k1);
+ }
+ }
+#endif /*UNALIGNED_SAFE*/
+
+ /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
+ len -= len/4*4;
+
+ /* Append any remaining bytes into carry */
+ DOBYTES(len, h1, c, n, ptr, len);
+
+ /* Copy out new running hash and carry */
+ *ph1 = h1;
+ *pcarry = (c & ~0xff) | n;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
+uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
+{
+ uint32_t k1;
+ int n = carry & 3;
+ if(n) {
+ k1 = carry >> (4-n)*8;
+ k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
+ }
+ h ^= total_length;
+
+ /* fmix */
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Murmur3A compatable all-at-once */
+uint32_t PMurHash32(uint32_t seed, const void *key, int len)
+{
+ uint32_t h1=seed, carry=0;
+ PMurHash32_Process(&h1, &carry, key, len);
+ return PMurHash32_Result(h1, carry, len);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Provide an API suitable for smhasher */
+void PMurHash32_test(const void *key, int len, uint32_t seed, void *out)
+{
+ uint32_t h1=seed, carry=0;
+ const uint8_t *ptr = (uint8_t*)key;
+ const uint8_t *end = ptr + len;
+
+#if 0 /* Exercise the progressive processing */
+ while(ptr < end) {
+ //const uint8_t *mid = ptr + rand()%(end-ptr)+1;
+ const uint8_t *mid = ptr + (rand()&0xF);
+ mid = mid<end?mid:end;
+ PMurHash32_Process(&h1, &carry, ptr, mid-ptr);
+ ptr = mid;
+ }
+#else
+ PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr));
+#endif
+ h1 = PMurHash32_Result(h1, carry, len);
+ *(uint32_t*)out = h1;
+}
+
+/*---------------------------------------------------------------------------*/
+#ifdef TEST
+int main() {
+ // http://www.cprover.org/cbmc/
+ // cbmc PMurHash.c --function PMurHash32 --unwind 255 --bounds-check --pointer-check
+ //=> seed=308736u (00000000000001001011011000000000)
+ // key=INVALID-128 (1000000011111111111111111111111111111111111111111111110101100111)
+ // len=640
+ // Violated property:
+ //file PMurHash.c line 201 function PMurHash32_Process
+ //dereference failure: object bounds
+ //!(POINTER_OFFSET(ptr) < 0) && OBJECT_SIZE(ptr) >= 1 + POINTER_OFFSET(ptr) || DYNAMIC_OBJECT(ptr)
+
+ uint32_t seed = 308736;
+ unsigned long long key = 0x80fffffffffffd67ULL;
+ PMurHash32(seed, &key, sizeof(key));
+}
+#endif
diff --git a/external/hash/PMurHash.h b/external/hash/PMurHash.h
new file mode 100644
index 0000000..28ead00
--- /dev/null
+++ b/external/hash/PMurHash.h
@@ -0,0 +1,64 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/* ------------------------------------------------------------------------- */
+/* Determine what native type to use for uint32_t */
+
+/* We can't use the name 'uint32_t' here because it will conflict with
+ * any version provided by the system headers or application. */
+
+/* First look for special cases */
+#if defined(_MSC_VER)
+ #define MH_UINT32 unsigned long
+#endif
+
+/* If the compiler says it's C99 then take its word for it */
+#if !defined(MH_UINT32) && ( \
+ defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L )
+ #include <stdint.h>
+ #define MH_UINT32 uint32_t
+#endif
+
+/* Otherwise try testing against max value macros from limit.h */
+#if !defined(MH_UINT32)
+ #include <limits.h>
+ #if (USHRT_MAX == 0xffffffffUL)
+ #define MH_UINT32 unsigned short
+ #elif (UINT_MAX == 0xffffffffUL)
+ #define MH_UINT32 unsigned int
+ #elif (ULONG_MAX == 0xffffffffUL)
+ #define MH_UINT32 unsigned long
+ #endif
+#endif
+
+#if !defined(MH_UINT32)
+ #error Unable to determine type name for unsigned 32-bit int
+#endif
+
+/* I'm yet to work on a platform where 'unsigned char' is not 8 bits */
+#define MH_UINT8 unsigned char
+
+
+/* ------------------------------------------------------------------------- */
+/* Prototypes */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len);
+MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length);
+MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len);
+
+void PMurHash32_test(const void *key, int len, MH_UINT32 seed, void *out);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/external/hash/README.md b/external/hash/README.md
new file mode 100644
index 0000000..d0f03e8
--- /dev/null
+++ b/external/hash/README.md
@@ -0,0 +1,158 @@
+Generic hash table implementation with focus on being minimally
+invasive on existing items to be indexed.
+
+The key is stored arbitrarily in the referenced item. A custom match
+function `HT_MATCH` provides the necessary abstraction. Items are
+NOT allocated by the hash table.
+
+Removed items are replaced with a sentinel value (1) to preserve
+chaining.
+
+See the example implementations `hash_set.h`, `hash_item_table.h`,
+and `hash_test.c`.
+
+The hash function can also be customized, see the default below.
+
+In all cases the key as assumed to be char string that is not
+(necessarily) zero terminated. The length is given separately. Keys
+can therefore be arbitrary binary values of arbitrary length.
+
+Instead of initializing the hash table, it may be zeroed. In that
+case the count defaults to 4 upon first insert, meaning it can hold
+up to 4 items before resizing or less depending on load factor. By
+zeroing memory, hash tables use no memory until actually used.
+
+For increased portability we do not rely upon `stdint.h` outside the
+default hash function.
+
+Build
+-----
+
+There are no special build requirements.
+
+CMakeLists.txt simply links the appropriate hash function with the test
+files, but CMake is not required, for example:
+
+ cc load_test.c ptr_set.c cmetrohash64.c -O4 -DNDEBUG -o load_test
+
+There are several significant flags that can be set, but look at
+`CMakeLists.txt`, `hash_test.c`, and `load_test.c`.
+
+`initbuild.sh` is an easy way to create a CMake Ninja build for
+platforms that support it.
+
+Usage
+-----
+
+The hash table is implemented in a generic form with a static (private)
+interface. The macros
+
+`HASH_TABLE_HEADER(name, item)` defines the public prototype for the
+specialized type, and `HASH_TABLE_API(name)` defines non-static wrapper
+functions to access the generic implementation. This avoids creating all
+the code as macros which are painful to develop and debug.
+
+See `token_map.h`, `token_map.c` which are used in `hash_test.c`.
+
+If the datatype is only needed in one file, the implementation such as
+`token_map.c` can be included after defining `HT_PRIVATE`. This gives
+the compiler better optimization opportunities and hides the interface
+from other compilation units.
+
+The basic datatype `hash_table_t` is a small struct that can be embedded
+anywhere and used as the instance of any hash table derived type.
+
+
+Note on algorithmic choice
+--------------------------
+
+We use linear or quadratic probing hash tables because it allows for
+many small hash tables. We overallocate the hash table by a factor 2
+(default) but only store a single pointer per item. This probing does
+not allow for dense tables by itself, but because the hash table only
+stores a single pointer per bucket, we can afford a larger table.
+Advanced hashing such as Hopscotch can pack much more densely but
+e.g. Hopscotch need to store a bitmask, thus already doubling the
+size of the table. Hopscotch is probably good, but more complex and
+depends on optimizing bit scan insructions, furthermore, when the use
+case is many small tables such as symbol table scopes, cache locality
+is less relevant. Chained hashing with 50% load factor is a good
+choice, but require intrusive links, and cannot trivially hash string
+sets without extra allocation. There is some evidence that linear
+probing may be faster than quadratic probing due to cache effects, as
+long as we do not pack too densely - however, the tradional quadratic
+probing (k + i * i) modulo prime does not cover all buckets. We use
+(k + i * (i + 1) / 2) modulo power of 2 which covers all buckets so
+without experimentation it is unclear whether linear probing or
+quadratic probing is best.
+
+The use of open addressing leads to more key comparisons than chained
+hashing. The fact we store the keys indirectly in the stored item is
+also not ideal, except when the item is also directly the key. If we
+use larger hash tables from the saved space, we suspect this will
+still perform well, also considering external factors such as not
+having to allocate and copy a key from e.g. a text buffer being
+parsed.
+
+It is generally understood that linear probing degrades significantly
+with a load factor above 0.7. In this light, it is interesting to note
+that Emmanuel Goossaert tested hopscotch hashing and found that bucket
+swaps only take place in significance above a load factor of 0.7. A
+commenter to Goossaert's blog also found that neighbourhoods rarely
+exceed 64 even when allowed to grow on demand. Without deep analysis
+it would appear that linear probing and hopscotch is pretty similar
+at a load factor of 0.5 especially if tombstones are not present.
+Because hopscotch requires extra data (e.g. the hash key or a bitmap
+or a linked list) this confirms our intuition that it is better with
+lower load factors and smaller buckets, than advanced algorithms.
+Furthermore, hopscotch insert degrades badly when it needs to search for
+empty buckets at high load factors. Of course, for on disk storage
+it is a different matter, and this is why Goossaert is interested
+in caching hash keys in buckets.
+
+Robin Hood hashing is mostly interesting when there are many deletions
+to clean up and when the load factor increases. In our implementation we
+try to keep the per bucket size small: a pointer and a 8 bit offset, or
+just a pointer for the linear and quadratic probing implementations.
+This makes it affordable with a lower load factor.
+
+This Robin Hood variation stores the offset from the hashed bucket to
+where the first entry is stored. This means we can avoiding sampling any
+bucket not indexed by the current hash key, and it also means that we
+avoid having to store or calculate the hash key when updating.
+
+A sorted Robin Hood hashing implementation was also made, but it prooved
+to be error prone with many special cases and slower than regular Robin
+Hood hashing. It would conceivably protect against hash collision
+attacks through exponential search, but insertions and deletions would
+still need to move memory in linear time, making this point mood.
+Therefore the sorted Robin Hood variant has been removed.
+
+
+Section 4.5:
+<http://codecapsule.com/2014/05/07/implementing-a-key-value-store-part-6-open-addressing-hash-tables/>
+
+<http://codecapsule.com/2013/08/11/hopscotch-hashing/>
+
+Source file references
+----------------------
+
+<http://www.jandrewrogers.com/2015/05/27/metrohash/>
+
+downloaded from
+
+ <https://github.com/rurban/smhasher>
+ <https://github.com/rurban/smhasher/commit/00a4e5ab6bfb7b25bd3c7cf915f68984d4910cfd>
+
+ <https://raw.githubusercontent.com/rurban/smhasher/master/cmetrohash64.c>
+ <https://raw.githubusercontent.com/rurban/smhasher/master/cmetrohash.h>
+ <https://raw.githubusercontent.com/rurban/smhasher/master/PMurHash.c>
+ <https://raw.githubusercontent.com/rurban/smhasher/master/PMurHash.h>
+
+As of July 2015, for 64-bit hashes, the C port of the 64 bit metro hash
+is a good trade-off between speed and simplicity. The For a 32-bit C hash
+function, the ported MurmurHash3 is safe and easy to use in this
+environment, but xxHash32 may also be worth considering.
+
+See also <http://www.strchr.com/hash_functions>
+
diff --git a/external/hash/cmetrohash.h b/external/hash/cmetrohash.h
new file mode 100644
index 0000000..b2c869a
--- /dev/null
+++ b/external/hash/cmetrohash.h
@@ -0,0 +1,78 @@
+// metrohash.h
+//
+// The MIT License (MIT)
+//
+// Copyright (c) 2015 J. Andrew Rogers
+//
+// Updated Nov. 2015 to use safe unaligned reads and platform neutral
+// hash. This WILL change hashes on big endian platfors. / mikkelfj
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef CMETROHASH_METROHASH_H
+#define CMETROHASH_METROHASH_H
+
+#include "ht_portable.h"
+#include "unaligned.h"
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+// MetroHash 64-bit hash functions
+void cmetrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
+void cmetrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
+
+
+/* rotate right idiom recognized by compiler*/
+inline static uint64_t crotate_right(uint64_t v, unsigned k)
+{
+ return (v >> k) | (v << (64 - k));
+}
+
+inline static uint64_t cread_u64(const void * const ptr)
+{
+ return (uint64_t)unaligned_read_le64toh(ptr);
+}
+
+inline static uint64_t cread_u32(const void * const ptr)
+{
+ return (uint64_t)unaligned_read_le32toh(ptr);
+}
+
+inline static uint64_t cread_u16(const void * const ptr)
+{
+ return (uint64_t)unaligned_read_le16toh(ptr);
+}
+
+inline static uint64_t cread_u8 (const void * const ptr)
+{
+ return * (uint8_t *) ptr;
+}
+
+#if defined (__cplusplus)
+}
+#endif
+#endif // #ifndef CMETROHASH_METROHASH_H
diff --git a/external/hash/cmetrohash64.c b/external/hash/cmetrohash64.c
new file mode 100644
index 0000000..2923958
--- /dev/null
+++ b/external/hash/cmetrohash64.c
@@ -0,0 +1,185 @@
+// metrohash64.cpp
+//
+// The MIT License (MIT)
+//
+// Copyright (c) 2015 J. Andrew Rogers
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "cmetrohash.h"
+
+
+void cmetrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out)
+{
+ static const uint64_t k0 = 0xC83A91E1;
+ static const uint64_t k1 = 0x8648DBDB;
+ static const uint64_t k2 = 0x7BDEC03B;
+ static const uint64_t k3 = 0x2F5870A5;
+
+ const uint8_t * ptr = key;
+ const uint8_t * const end = ptr + len;
+
+ uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len;
+
+ if (len >= 32)
+ {
+ uint64_t v[4];
+ v[0] = hash;
+ v[1] = hash;
+ v[2] = hash;
+ v[3] = hash;
+
+ do
+ {
+ v[0] += cread_u64(ptr) * k0; ptr += 8; v[0] = crotate_right(v[0],29) + v[2];
+ v[1] += cread_u64(ptr) * k1; ptr += 8; v[1] = crotate_right(v[1],29) + v[3];
+ v[2] += cread_u64(ptr) * k2; ptr += 8; v[2] = crotate_right(v[2],29) + v[0];
+ v[3] += cread_u64(ptr) * k3; ptr += 8; v[3] = crotate_right(v[3],29) + v[1];
+ }
+ while (ptr <= (end - 32));
+
+ v[2] ^= crotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1;
+ v[3] ^= crotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0;
+ v[0] ^= crotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1;
+ v[1] ^= crotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
+ hash += v[0] ^ v[1];
+ }
+
+ if ((end - ptr) >= 16)
+ {
+ uint64_t v0, v1;
+ v0 = hash + (cread_u64(ptr) * k0); ptr += 8; v0 = crotate_right(v0,33) * k1;
+ v1 = hash + (cread_u64(ptr) * k1); ptr += 8; v1 = crotate_right(v1,33) * k2;
+ v0 ^= crotate_right(v0 * k0, 35) + v1;
+ v1 ^= crotate_right(v1 * k3, 35) + v0;
+ hash += v1;
+ }
+
+ if ((end - ptr) >= 8)
+ {
+ hash += cread_u64(ptr) * k3; ptr += 8;
+ hash ^= crotate_right(hash, 33) * k1;
+
+ }
+
+ if ((end - ptr) >= 4)
+ {
+ hash += cread_u32(ptr) * k3; ptr += 4;
+ hash ^= crotate_right(hash, 15) * k1;
+ }
+
+ if ((end - ptr) >= 2)
+ {
+ hash += cread_u16(ptr) * k3; ptr += 2;
+ hash ^= crotate_right(hash, 13) * k1;
+ }
+
+ if ((end - ptr) >= 1)
+ {
+ hash += cread_u8 (ptr) * k3;
+ hash ^= crotate_right(hash, 25) * k1;
+ }
+
+ hash ^= crotate_right(hash, 33);
+ hash *= k0;
+ hash ^= crotate_right(hash, 33);
+
+ memcpy(out, &hash, 8);
+}
+
+
+void cmetrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out)
+{
+ static const uint64_t k0 = 0xD6D018F5;
+ static const uint64_t k1 = 0xA2AA033B;
+ static const uint64_t k2 = 0x62992FC1;
+ static const uint64_t k3 = 0x30BC5B29;
+
+ const uint8_t * ptr = key;
+ const uint8_t * const end = ptr + len;
+
+ uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len;
+
+ if (len >= 32)
+ {
+ uint64_t v[4];
+ v[0] = hash;
+ v[1] = hash;
+ v[2] = hash;
+ v[3] = hash;
+
+ do
+ {
+ v[0] += cread_u64(ptr) * k0; ptr += 8; v[0] = crotate_right(v[0],29) + v[2];
+ v[1] += cread_u64(ptr) * k1; ptr += 8; v[1] = crotate_right(v[1],29) + v[3];
+ v[2] += cread_u64(ptr) * k2; ptr += 8; v[2] = crotate_right(v[2],29) + v[0];
+ v[3] += cread_u64(ptr) * k3; ptr += 8; v[3] = crotate_right(v[3],29) + v[1];
+ }
+ while (ptr <= (end - 32));
+
+ v[2] ^= crotate_right(((v[0] + v[3]) * k0) + v[1], 30) * k1;
+ v[3] ^= crotate_right(((v[1] + v[2]) * k1) + v[0], 30) * k0;
+ v[0] ^= crotate_right(((v[0] + v[2]) * k0) + v[3], 30) * k1;
+ v[1] ^= crotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0;
+ hash += v[0] ^ v[1];
+ }
+
+ if ((end - ptr) >= 16)
+ {
+ uint64_t v0, v1;
+ v0 = hash + (cread_u64(ptr) * k2); ptr += 8; v0 = crotate_right(v0,29) * k3;
+ v1 = hash + (cread_u64(ptr) * k2); ptr += 8; v1 = crotate_right(v1,29) * k3;
+ v0 ^= crotate_right(v0 * k0, 34) + v1;
+ v1 ^= crotate_right(v1 * k3, 34) + v0;
+ hash += v1;
+ }
+
+ if ((end - ptr) >= 8)
+ {
+ hash += cread_u64(ptr) * k3; ptr += 8;
+ hash ^= crotate_right(hash, 36) * k1;
+ }
+
+ if ((end - ptr) >= 4)
+ {
+ hash += cread_u32(ptr) * k3; ptr += 4;
+ hash ^= crotate_right(hash, 15) * k1;
+ }
+
+ if ((end - ptr) >= 2)
+ {
+ hash += cread_u16(ptr) * k3; ptr += 2;
+ hash ^= crotate_right(hash, 15) * k1;
+ }
+
+ if ((end - ptr) >= 1)
+ {
+ hash += cread_u8 (ptr) * k3;
+ hash ^= crotate_right(hash, 23) * k1;
+ }
+
+ hash ^= crotate_right(hash, 28);
+ hash *= k0;
+ hash ^= crotate_right(hash, 29);
+
+ memcpy(out, &hash, 8);
+}
+
+
diff --git a/external/hash/hash.h b/external/hash/hash.h
new file mode 100644
index 0000000..c5a6fc6
--- /dev/null
+++ b/external/hash/hash.h
@@ -0,0 +1,115 @@
+#ifndef HASH_H
+#define HASH_H
+
+/* Misc. hash functions that do not comply to a specific interface. */
+
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+/* `inline` only advisory anyway. */
+#pragma warning(disable: 4710) /* function not inlined */
+#endif
+
+static inline uint32_t hash_fnv1a32_update(uint32_t seed, uint8_t *buf, size_t len)
+{
+ uint8_t *p = buf;
+#ifndef FNV1A_NOMUL
+ const uint64_t prime = UINT32_C(0x1000193);
+#endif
+ uint64_t hash = seed;
+
+ while (len--) {
+ hash ^= (uint64_t)*p++;
+#ifndef FNV1A_NOMUL
+ hash *= prime;
+#else
+ hash += (hash << 1) + (hash << 4) + (hash << 7) +
+ (hash << 8) + (hash << 24);
+#endif
+ }
+ return hash;
+}
+
+static inline uint32_t hash_fnv1a32(uint8_t *buf, size_t len)
+{
+ return hash_fnv1a32_update(UINT32_C(0x811c9dc5), buf, len);
+}
+
+static inline uint64_t hash_fnv1a64_update(uint64_t v, uint8_t *buf, size_t len)
+{
+ uint8_t *p = buf;
+#ifndef FNV1A_NOMUL
+ const uint64_t prime = UINT64_C(0x100000001b3);
+#endif
+ uint64_t hash = v;
+
+ while (len--) {
+ hash ^= (uint64_t)*p++;
+#ifndef FNV1A_NOMUL
+ hash *= prime;
+#else
+ hash += (hash << 1) + (hash << 4) + (hash << 5) +
+ (hash << 7) + (hash << 8) + (hash << 40);
+#endif
+ }
+ return hash;
+}
+
+static inline uint64_t hash_fnv1a64(uint8_t *buf, size_t len)
+{
+ return hash_fnv1a64_update(UINT64_C(0xcbf29ce484222325), buf, len);
+}
+
+/*
+ * MurmurHash 3 final mix with seed to handle 0.
+ *
+ * Width is number of bits of the value to return.
+ * http://stackoverflow.com/a/12996028
+ */
+static inline uint32_t hash_bucket32(uint32_t v, size_t width)
+{
+ uint32_t x = v + UINT32_C(0x2f693b52);
+
+ x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b);
+ x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b);
+ x = ((x >> 16) ^ x);
+ return x >> (32 - width);
+}
+
+/*
+ * SplitMix64 - can be used to disperse fnv1a hash, to hash
+ * an integer, or as a simple non-cryptographic prng.
+ *
+ * Width is number of bits of the value to return.
+ * http://stackoverflow.com/a/12996028
+ */
+static inline uint64_t hash_bucket64(uint64_t v, size_t width)
+{
+ uint64_t x = v + UINT64_C(0x9e3779b97f4a7c15);
+
+ x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+ x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
+ x = x ^ (x >> 31);
+ return x >> (64 - width);
+}
+
+static inline uint64_t hash_random64(uint64_t *state)
+{
+ uint64_t x;
+
+ x = hash_bucket64(*state, 64);
+ *state = x;
+ return x;
+}
+
+/*
+ * Faster, less random hash bucket compared to hash_bucket32, but works
+ * for smaller integers.
+ */
+static inline uint32_t hash_mult32(uint32_t v, size_t width)
+{
+ /* Knuth's multiplicative hash. */
+ return (v * UINT32_C(2654435761)) >> (32 - width);
+}
+
+#endif /* HASH_H */
diff --git a/external/hash/hash_table.h b/external/hash/hash_table.h
new file mode 100644
index 0000000..5c3e9cd
--- /dev/null
+++ b/external/hash/hash_table.h
@@ -0,0 +1,266 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HASH_TABLE_H
+#define HASH_TABLE_H
+
+#include "ht_portable.h"
+#include <stddef.h>
+
+/*
+ * Define HT_PRIVATE to make all name wrapping interface functions static
+ * inline when including hash implementation directly in user code. This
+ * can increase performance significantly (3x) on small hash tables with
+ * fast hash functions because the compiler can better optimize static
+ * functions. Some compiler optimizations will get the same speed
+ * with external linkage (clang 4.2 -O4 but not -O3).
+ *
+ * Can also be used to simple hide the interface from global
+ * linkage to avoid name clashes.
+ */
+#ifndef HT_PRIVATE
+#define HT_PRIV
+#else
+#define HT_PRIV static inline
+#endif
+
+/*
+ * Generic hash table type. This makes it possible to use hash tables
+ * in datastructures and header files that do not have access to
+ * the specific hash table implementation. Call to init is optional
+ * if the structure is zeroed.
+ *
+ * Offsets are only used with Robin Hood hashing to segment each chain.
+ *
+ * Keys and values are both stored in the same item pointer. There are
+ * downsides to this over a key / value represention, but since we also
+ * use less space we can afford lower the load factor and we can have a
+ * more complex key representations. The smaller bucket size also helps
+ * when ordering Robin Hood hash chains.
+ */
+typedef struct hash_table hash_table_t;
+struct hash_table {
+ void *table;
+ char *offsets;
+ size_t count;
+ /* May be stored as a direct count, or log2. */
+ size_t buckets;
+};
+
+enum hash_table_insert_mode {
+ ht_replace = 0,
+ ht_keep = 1,
+ ht_unique = 2,
+ ht_multi = 3,
+};
+
+/*
+ * This macro defines the prototypes of the hash table that user code
+ * needs for linkage.
+ *
+ * See also "hash_table_def.h" which builds wrapper functions to a
+ * generic hash table implementation so each specialization gets its own
+ * set of named functions.
+ *
+ * The HT_ITEM is normally a pointer to and the hash table does not
+ * store any signficant information internally. Customizations map
+ * the item value to a key. Certain values can be reserved, for
+ * example 0 indicates missing value, and sometimes 1 is reserved for
+ * internal tombstones and 2 may be used to return allocation failure.
+ */
+#define DECLARE_HASH_TABLE(HT_NAME, HT_ITEM) \
+ \
+typedef hash_table_t HT_NAME##_t; \
+typedef HT_ITEM HT_NAME##_item_t; \
+ \
+/* Prototype for user supplied callback when visiting all elements. */ \
+typedef void HT_NAME##_visitor_f(void *context, HT_ITEM item); \
+ \
+extern const HT_NAME##_item_t HT_NAME##_missing; \
+extern const HT_NAME##_item_t HT_NAME##_nomem; \
+extern const HT_NAME##_item_t HT_NAME##_deleted; \
+ \
+static inline int HT_NAME##_is_valid(HT_ITEM item) \
+{ \
+ return \
+ item != HT_NAME##_missing && \
+ item != HT_NAME##_nomem && \
+ item != HT_NAME##_deleted; \
+} \
+ \
+static inline int HT_NAME##_is_missing(HT_ITEM item) \
+{ \
+ return item == HT_NAME##_missing; \
+} \
+ \
+static inline int HT_NAME##_is_nomem(HT_ITEM item) \
+{ \
+ return item == HT_NAME##_nomem; \
+} \
+ \
+static inline int HT_NAME##_is_deleted(HT_ITEM item) \
+{ \
+ return item == HT_NAME##_deleted; \
+} \
+ \
+/* \
+ * Allocates enough buckets to represent count elements without resizing. \
+ * The actual number of allocated buckets depends on the load factor \
+ * given as a macro argument in the implementation. The bucket number \
+ * rounds up to the nearest power of 2. \
+ * \
+ * `ht` should not be initialized beforehand, otherwise use resize. \
+ * Alternatively, it is also valid to zero initialize the table by \
+ * other means - this will postpone allocation until needed. \
+ * \
+ * The load factor (template argument) should be positive and at most \
+ * 100%, otherwise insertion and resize cannot succeed. The recommended \
+ * load factor is between 25% and 75%. \
+ * \
+ * Returns 0 on success, -1 on allocation failure or invalid load factor. \
+ */ \
+HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count); \
+ \
+/* \
+ * Clears the allocated memory. Optionally takes a destructor \
+ * that will visit all items. \
+ * The table struct may be reused after being destroyed. \
+ * May also be called on a zero initialised hash table. \
+ * \
+ * Can be called in place of clear for more control. \
+ */ \
+HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht, \
+ HT_NAME##_visitor_f *destructor, void *context); \
+ \
+/* \
+ * Clears the allocated memory, but does manage memory or state of any \
+ * stored items. It is a simpler version of destroy. \
+ */ \
+HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht); \
+ \
+/* \
+ * Resizes the hash table to hold at least `count` elements. \
+ * The actual number of allocated buckets is a strictly larger power of \
+ * two. If `count` is smaller than the current number of elements, \
+ * that number is used instead of count. Thus, resize(ht, 0) may be \
+ * used to reduce the table size after a spike. \
+ * The function is called automatically as elements are inserted, \
+ * but shrinking the table should be done manually. \
+ * \
+ * If resizing to same size, table is still reallocated but will then \
+ * clean up old tombstones from excessive deletion. \
+ * \
+ * Returns 0 on success, -1 on allocation failure. \
+ */ \
+HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count); \
+ \
+/* \
+ * Inserts an item pointer in one of the following modes: \
+ * \
+ * ht_keep: \
+ * If the key exists, the stored item is kept and returned, \
+ * otherwise it is inserted and null is returned. \
+ * \
+ * ht_replace: \
+ * If the key exists, the stored item is replaced and the old \
+ * item is returned, otherwise the item is inserted and null \
+ * is returned. \
+ * \
+ * ht_unique: \
+ * Inserts an item without checking if a key exists. Always return \
+ * null. This is faster when it is known that the key does not exists. \
+ * \
+ * ht_multi: \
+ * Same as ht_unique but with the intention that a duplicate key \
+ * might exist. This should not be abused because not all hash table \
+ * implementions work well with too many collissions. Robin Hood \
+ * hashing might reallocate aggressively to keep the chain length \
+ * down. Linear and Quadratic probing do handle this, albeit slow. \
+ * \
+ * The inserted item cannot have the value HT_MISSING and depending on \
+ * implementation also not HT_DELETED and HT_NOMEM, but the \
+ * definitions are type specific. \
+ */ \
+HT_PRIV HT_ITEM HT_NAME##_insert(HT_NAME##_t *ht, \
+ const void *key, size_t len, HT_ITEM item, int mode); \
+ \
+/* Similar to insert, but derives key from item. */ \
+HT_PRIV HT_ITEM HT_NAME##_insert_item(HT_NAME##_t *ht, \
+ HT_ITEM item, int mode); \
+ \
+/* \
+ * Finds the first matching item if any, or returns null. \
+ * If there are duplicate keys, the first inserted is returned. \
+ */ \
+HT_PRIV HT_ITEM HT_NAME##_find(HT_NAME##_t *ht, \
+ const void *key, size_t len); \
+ \
+/* \
+ * Removes first inserted item that match the given key, if any. \
+ * Returns the removed item if any, otherwise null. \
+ */ \
+HT_PRIV HT_ITEM HT_NAME##_remove(HT_NAME##_t *ht, \
+ const void *key, size_t len); \
+ \
+/* \
+ * Finds an item that compares the same as the given item but it is \
+ * not necessarily the same item if it either isn't stored, or if \
+ * there are duplicates in the table. \
+ */ \
+HT_PRIV HT_ITEM HT_NAME##_find_item(HT_NAME##_t *ht, HT_ITEM item); \
+ \
+/* \
+ * This removes the first item that matches the given item, not \
+ * necessarily the item itself, and the item need not be present \
+ * in the table. Even if the item is in fact removed, it may still \
+ * be present if stored multiple times through abuse use of the \
+ * insert_unique function. \
+ */ \
+HT_PRIV HT_ITEM HT_NAME##_remove_item(HT_NAME##_t *ht, HT_ITEM item); \
+ \
+/* \
+ * Calls a function for every item in the hash table. This may be \
+ * used for destructing items, provided the table is not accessed \
+ * subsequently. In fact, the hash_table_clear function takes an \
+ * optional visitor that does exactly that. \
+ * \
+ * The function is linear of the allocated hash table size, so will be \
+ * inefficient if the hash table was resized much larger than the number \
+ * of stored items. In that case it is better to store links in the \
+ * items. For the default resizing, the function is reasonably fast \
+ * because for cache reasons it is very fast to exclude empty elements. \
+ */ \
+HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht, \
+ HT_NAME##_visitor_f *visitor, void *context); \
+ \
+/* \
+ * Returns number of elements in the table. (Not necessarily the number of \
+ * unique keys. \
+ */ \
+static inline size_t HT_NAME##_count(HT_NAME##_t *ht) \
+{ \
+ return ht->count; \
+} \
+
+#endif /* HASH_TABLE_H */
diff --git a/external/hash/hash_table_def.h b/external/hash/hash_table_def.h
new file mode 100644
index 0000000..5362d47
--- /dev/null
+++ b/external/hash/hash_table_def.h
@@ -0,0 +1,154 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HASH_TABLE_DEF_H
+#define HASH_TABLE_DEF_H
+
+#include "ht_hash_function.h"
+#ifndef HT_HASH_FUNCTION
+/*
+ * If the default hash function is used, make sure to link with the
+ * appropriate hash implementation file.
+ */
+#define HT_HASH_FUNCTION ht_default_hash_function
+#endif
+
+#ifndef HT_LOAD_FACTOR
+#define HT_LOAD_FACTOR 0.7
+#endif
+
+#define HT_LOAD_FACTOR_FRAC ((size_t)((float)(HT_LOAD_FACTOR)*256))
+
+#ifndef HT_PANIC
+#include <stdio.h>
+#define HT_PANIC(s) { fprintf(stderr, "aborting on panic: %s\n", s); exit(1); }
+#endif
+
+#ifndef HT_MISSING
+#define HT_MISSING ((ht_item_t)0)
+#endif
+
+#ifndef HT_NOMEM
+#define HT_NOMEM ((ht_item_t)1)
+#endif
+
+#ifndef HT_DELETED
+#define HT_DELETED ((ht_item_t)2)
+#endif
+
+#define DEFINE_HASH_TABLE(HT_NAME) \
+ \
+typedef HT_NAME##_item_t ht_item_t; \
+typedef HT_NAME##_visitor_f ht_visitor_f; \
+ \
+/* User supplied. */ \
+static inline int ht_match(const void *key, size_t len, ht_item_t item); \
+static inline const void *ht_key(ht_item_t item); \
+static inline size_t ht_key_len(ht_item_t item); \
+ \
+/* Implementation supplied. */ \
+static ht_item_t ht_insert(hash_table_t *ht, \
+ const void *key, size_t len, ht_item_t new_item, int mode); \
+static ht_item_t ht_find(hash_table_t *ht, const void *key, size_t len); \
+static ht_item_t ht_remove(hash_table_t *ht, const void *key, size_t len); \
+static int ht_init(hash_table_t *ht, size_t count); \
+static int ht_resize(hash_table_t *ht, size_t count); \
+static void ht_clear(hash_table_t *ht); \
+static void ht_visit(hash_table_t *ht, \
+ ht_visitor_f *visitor, void *context); \
+ \
+const ht_item_t HT_NAME##_missing = HT_MISSING; \
+const ht_item_t HT_NAME##_nomem = HT_NOMEM; \
+const ht_item_t HT_NAME##_deleted = HT_DELETED; \
+ \
+HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht) \
+{ \
+ ht_clear(ht); \
+} \
+ \
+HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht, \
+ HT_NAME##_visitor_f *destructor, void *context) \
+{ \
+ if (destructor) { \
+ ht_visit(ht, destructor, context); \
+ } \
+ ht_clear(ht); \
+} \
+ \
+HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count) \
+{ \
+ return ht_init(ht, count); \
+} \
+ \
+HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count) \
+{ \
+ return ht_resize(ht, count); \
+} \
+ \
+HT_PRIV ht_item_t HT_NAME##_insert(HT_NAME##_t *ht, \
+ const void *key, size_t len, ht_item_t new_item, int mode) \
+{ \
+ return ht_insert(ht, key, len, new_item, mode); \
+} \
+ \
+HT_PRIV ht_item_t HT_NAME##_insert_item(HT_NAME##_t *ht, \
+ ht_item_t item, int mode) \
+{ \
+ return ht_insert(ht, \
+ ht_key(item), \
+ ht_key_len(item), \
+ item, mode); \
+} \
+ \
+HT_PRIV ht_item_t HT_NAME##_find(HT_NAME##_t *ht, \
+ const void *key, size_t len) \
+{ \
+ return ht_find(ht, key, len); \
+} \
+ \
+HT_PRIV ht_item_t HT_NAME##_find_item(HT_NAME##_t *ht, ht_item_t item) \
+{ \
+ return ht_find(ht, \
+ ht_key(item), \
+ ht_key_len(item)); \
+} \
+ \
+HT_PRIV ht_item_t HT_NAME##_remove(HT_NAME##_t *ht, \
+ const void *key, size_t len) \
+{ \
+ return ht_remove(ht, key, len); \
+} \
+ \
+HT_PRIV ht_item_t HT_NAME##_remove_item(HT_NAME##_t *ht, ht_item_t item) \
+{ \
+ return ht_remove(ht, ht_key(item), ht_key_len(item)); \
+} \
+ \
+HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht, \
+ HT_NAME##_visitor_f *visitor, void *context) \
+{ \
+ ht_visit(ht, visitor, context); \
+} \
+
+#endif /* HASH_TABLE_DEF_H */
diff --git a/external/hash/hash_table_impl.h b/external/hash/hash_table_impl.h
new file mode 100644
index 0000000..94fc9b8
--- /dev/null
+++ b/external/hash/hash_table_impl.h
@@ -0,0 +1,233 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/*
+ * This file implements a generic hash interface such that different
+ * instances have the same name, but hidden from each other.
+ * The interface maps the local names to a public specific type.
+ *
+ * This implementations implements a hash table with linear or quadratic
+ * probing.
+ */
+
+#ifdef HASH_TABLE_IMPL
+#error "cannot have multiple implementations in same compilation unit"
+#endif
+#define HASH_TABLE_IMPL
+/* Open Addressing */
+#define HT_OA
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+#include <stdlib.h>
+#include <assert.h>
+
+#ifndef HT_PROBE
+#ifdef HT_PROBE_QUADRATIC
+#define HT_PROBE(k, i, N) ((k + (i + i * i) / 2) & N)
+#else
+#define HT_PROBE(k, i, N) ((k + i) & N)
+#endif
+#endif
+
+static int ht_init(hash_table_t *ht, size_t count)
+{
+ size_t buckets = 4;
+
+ if ((HT_LOAD_FACTOR_FRAC) > 256 || (HT_LOAD_FACTOR_FRAC) < 1) {
+ /*
+ * 100% is bad but still the users choice.
+ * 101% will never terminate insertion.
+ */
+ HT_PANIC("hash table failed with impossible load factor");
+ return -1;
+ }
+ while (count > buckets * (HT_LOAD_FACTOR_FRAC) / 256) {
+ buckets *= 2;
+ }
+ ht->table = calloc(buckets, sizeof(ht_item_t));
+ if (ht->table == 0) {
+ return -1;
+ }
+ ht->offsets = 0;
+ ht->buckets = buckets;
+ ht->count = 0;
+ return 0;
+}
+
+static int ht_resize(hash_table_t *ht, size_t count)
+{
+ size_t i;
+ hash_table_t ht2;
+ ht_item_t *T = ht->table;
+ void *item;
+
+ if (count < ht->count) {
+ count = ht->count;
+ }
+ if (ht_init(&ht2, count)) {
+ return -1;
+ }
+ for (i = 0; i < ht->buckets; ++i) {
+ item = T[i];
+ if ((item && item != HT_DELETED)) {
+ ht_insert(&ht2, ht_key(item), ht_key_len(item), item, ht_multi);
+ }
+ }
+ ht_clear(ht);
+ memcpy(ht, &ht2, sizeof(*ht));
+ return 0;
+}
+
+static ht_item_t ht_insert(hash_table_t *ht,
+ const void *key, size_t len, ht_item_t new_item, int mode)
+{
+ ht_item_t *T;
+ size_t N, i, j, k;
+ ht_item_t item, *vacant = 0;
+
+ assert(new_item != HT_MISSING);
+ assert(new_item != HT_DELETED);
+ assert(new_item != HT_NOMEM);
+
+ if (ht->count >= ht->buckets * (HT_LOAD_FACTOR_FRAC) / 256) {
+ if (ht_resize(ht, ht->count * 2)) {
+ HT_PANIC("hash table failed to allocate memory during resize");
+ return HT_NOMEM;
+ }
+ }
+ T = ht->table;
+ N = ht->buckets - 1;
+ k = HT_HASH_FUNCTION(key, len);
+ i = 0;
+ j = HT_PROBE(k, i, N);
+ if (mode == ht_unique || mode == ht_multi) {
+ ++ht->count;
+ while (T[j] && T[j] != HT_DELETED) {
+ ++i;
+ j = HT_PROBE(k, i, N);
+ }
+ T[j] = new_item;
+ return 0;
+ }
+ while ((item = T[j])) {
+ if (item == HT_DELETED) {
+ if (vacant == 0) {
+ /*
+ * If a tombstone was found, use the first available,
+ * but continue search for possible match.
+ */
+ vacant = &T[j];
+ }
+ } else if (ht_match(key, len, item)) {
+ if (mode == ht_replace) {
+ T[j] = new_item;
+ }
+ return item;
+ }
+ ++i;
+ j = HT_PROBE(k, i, N);
+ }
+ if (vacant == 0) {
+ vacant = &T[j];
+ }
+ ++ht->count;
+ *vacant = new_item;
+ return 0;
+}
+
+static ht_item_t ht_find(hash_table_t *ht, const void *key, size_t len)
+{
+ ht_item_t *T = ht->table;
+ size_t N, i, j, k;
+ ht_item_t item;
+
+ if (T == 0) {
+ return 0;
+ }
+ N = ht->buckets - 1;
+ k = HT_HASH_FUNCTION(key, len);
+ i = 0;
+ j = HT_PROBE(k, i, N);
+ while ((item = T[j])) {
+ if ((item != HT_DELETED) &&
+ ht_match(key, len, item)) {
+ return item;
+ }
+ ++i;
+ j = HT_PROBE(k, i, N);
+ }
+ return 0;
+}
+
+static ht_item_t ht_remove(hash_table_t *ht, const void *key, size_t len)
+{
+ ht_item_t *T = ht->table;
+ size_t N, i, j, k;
+ ht_item_t item;
+
+ if (T == 0) {
+ return 0;
+ }
+ N = ht->buckets - 1;
+ k = HT_HASH_FUNCTION(key, len);
+ i = 0;
+ j = HT_PROBE(k, i, N);
+ while ((item = T[j])) {
+ if (item != HT_DELETED &&
+ ht_match(key, len, item)) {
+ T[j] = HT_DELETED;
+ --ht->count;
+ return item;
+ }
+ ++i;
+ j = HT_PROBE(k, i, N);
+ }
+ return 0;
+}
+
+static void ht_visit(hash_table_t *ht, ht_visitor_f *visitor, void *context)
+{
+ size_t i;
+ ht_item_t *T = ht->table;
+ ht_item_t item;
+
+ for (i = 0; i < ht->buckets; ++i) {
+ item = T[i];
+ if (item && item != HT_DELETED) {
+ visitor(context, item);
+ }
+ }
+}
+
+static void ht_clear(hash_table_t *ht)
+{
+ if (ht->table) {
+ free(ht->table);
+ }
+ memset(ht, 0, sizeof(*ht));
+}
diff --git a/external/hash/hash_table_impl_rh.h b/external/hash/hash_table_impl_rh.h
new file mode 100644
index 0000000..b4cabae
--- /dev/null
+++ b/external/hash/hash_table_impl_rh.h
@@ -0,0 +1,360 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* We use the same define for all implementations */
+#ifdef HASH_TABLE_IMPL
+#error "cannot have multiple implementations in same compilation unit"
+#endif
+#define HASH_TABLE_IMPL
+/* Robin Hood (with offset table) */
+#define HT_RH
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+#include <stdlib.h>
+#include <assert.h>
+
+/*
+ * A variation of Robin Hashing:
+ * We do not calcute distance from buckets, nor do we cache
+ * hash keys. Instead we maintain a 7-bit offset that points
+ * to where the first entry of a bucket is stored. In Robin Hood hashing
+ * all entries conceptually chained to the same bucket are stored
+ * immediately after each other in order of insertion. The offset of
+ * the next bucket is naturally the end of the previous bucket, off by
+ * one. This breaks down when the bucket offset is 0 and the bucket is
+ * empty because it suggests there is an element. We cannot distinguish
+ * between a single used and unused entry, except by looking at the
+ * content or otherwise tag the information on. This is not a problem,
+ * just a special case to deal with.
+ *
+ * The offsets are stored separately which might lead to more cache line
+ * traffic, but the alternative is not very elegant - either wasting
+ * space or trying to pack offsets on a per cache line basis. We only
+ * need 8 bits for offsets. If the offset overflows, bit 7 will be set
+ * which we can easily detect. During insertion, offsets are increated
+ * on all affected buckets, and likewise decrement on remove. In
+ * principle we can use bit parallel increments to update most offsets
+ * in a single operation, but it is hardly worthwhile due to setup
+ * cost. The approach bears some resemblance to hopscotch hashing which
+ * uses local offsets for chaining, but we prefer the simpler Robin
+ * Hood approach.
+ *
+ * If the offset overflows, the table is resized. We expect the packed
+ * chains to behave like a special case of a hopscotch layout and
+ * consequently have the same bounds, meaning we are unlikely to have
+ * neither long offsets nor long chains if we resize below very full
+ * so resizing on an offset of 128 should be ok.
+ *
+ * Our main motivation for this hashing is actually to get rid of
+ * tombstones in quadruatic and linear probing. Avoiding tombstones
+ * is much simpler when sorting chains Robin Hood style, and we avoid
+ * checking for tombstones. We loose this benefit by having to inspect
+ * offsets, but then also avoid checking keys before the chain, and
+ * after because we can zero in on exactly the entries belonging to
+ * bucket.
+ *
+ * Unlike traditional Robin Hood, we can find a missing key very quickly
+ * without any heuristics: we only need to inspect exactly the number
+ * of entries in the bucket (or at most 1 if the bucket is empty).
+ *
+ * Find operations start exactly at an entry with a matching hash key
+ * unlike normal Robin Hood which must scan past a few earlier entries
+ * on average, or guestimate where to start and seek both ways.
+ *
+ * We can also very quickly insert a key that is known to be unique
+ * because we can add it directly to the end (but possibly requiring
+ * a shift of later entries Robin Hood style).
+ *
+ * Whether these benefits outweighs the cost of a separate offset
+ * lookup is unclear, but the reduced memory consumption certainly
+ * allows for a lower load factor, which also helps a lot.
+ *
+ * Traditional Robin Hood Hashing actually permits a chain to become
+ * very long. We do not permit this, in line with hopscotch hashing.
+ * This is a drawback from a security perspective because worst case
+ * this can trigger resizing ad infinitum iff the hash function can
+ * be hacked or massive duplicate key insertion can be triggered. By
+ * used the provided hash functions and seeding them randomly at
+ * startup, and avoiding the multi key feature, it is very unlikely to
+ * be a problem with what is known about hash table attacks so far.
+ *
+ * Values and keys are not stored, only item pointers. Custom macroes
+ * or inline functions provide access to key data from the item. We
+ * could add a separate value array and treat the item strictly as a
+ * key, but we can have a smaller load factor instead, and can more
+ * easily avoid copying complex key structures, such as start end
+ * pointers to token data for parser.
+ *
+ * A typical hash table has: key pointer or key value, value pointer
+ * or value, a cached hash key or bitmap (for Robin Hood or Hopscotch)
+ * which on 64 bit platforms easily amounts to 20 bytes or more per
+ * bucket. We use 9 bytes on 64 bit platforms and 5 bytes on 32 bit.
+ * This gets us down to a max load of 0.5 and on average about 0.37.
+ * This should make it very likely that the first bucket inspected is
+ * a direct hit negating the benefit of caching hash keys. In addition,
+ * when it is not a direct hit, we get pointers loaded in a cache line
+ * to inspect, all known to have the same hash key.
+ */
+
+int ht_init(hash_table_t *ht, size_t count)
+{
+ size_t buckets = 4;
+
+ if ((HT_LOAD_FACTOR_FRAC) > 256 || (HT_LOAD_FACTOR_FRAC) < 1) {
+ /*
+ * 101% will never terminate insertion.
+ * 0% will never terminate resize.
+ */
+ HT_PANIC("robin hood hash table failed with impossible load factor");
+ return -1;
+ }
+ while (count > buckets * (HT_LOAD_FACTOR_FRAC) / 256) {
+ buckets *= 2;
+ }
+ ht->table = calloc(buckets, sizeof(ht_item_t));
+ if (ht->table == 0) {
+ return -1;
+ }
+ ht->offsets = calloc(buckets, sizeof(char));
+ if (ht->offsets == 0) {
+ free(ht->table);
+ ht->table = 0;
+ return -1;
+ }
+ ht->buckets = buckets;
+ ht->count = 0;
+ return 0;
+}
+
+int ht_resize(hash_table_t *ht, size_t count)
+{
+ size_t i;
+ hash_table_t ht2;
+ ht_item_t *T = ht->table;
+ ht_item_t item;
+
+ if (count < ht->count) {
+ count = ht->count;
+ }
+ if (ht_init(&ht2, count)) {
+ return -1;
+ }
+ for (i = 0; i < ht->buckets; ++i) {
+ item = T[i];
+ if (item > (ht_item_t)1) {
+ ht_insert(&ht2, ht_key(item), ht_key_len(item), item, ht_multi);
+ }
+ }
+ ht_clear(ht);
+ memcpy(ht, &ht2, sizeof(*ht));
+ return 0;
+}
+
+ht_item_t ht_insert(hash_table_t *ht,
+ const void *key, size_t len, ht_item_t item, int mode)
+{
+ ht_item_t *T;
+ size_t N, n, j, k, offset;
+ ht_item_t new_item;
+ char overflow = 0;
+
+ new_item = item;
+ if (ht->count >= ht->buckets * (HT_LOAD_FACTOR_FRAC) / 256) {
+ if (ht_resize(ht, ht->count * 2)) {
+ HT_PANIC("robin hood hash table failed to allocate memory during resize");
+ return HT_NOMEM;
+ }
+ }
+ T = ht->table;
+ N = ht->buckets - 1;
+ k = HT_HASH_FUNCTION(key, len) & N;
+ offset = ht->offsets[k];
+ j = (k + offset) & N;
+ /*
+ * T[j] == 0 is a special case because we cannot count
+ * zero probe length, and because we should not increment
+ * the offset at insertion point in this case.
+ *
+ * T[j] == 0 implies offset == 0, but this way we avoid
+ * hitting memory that we don't need.
+ */
+ if (offset == 0 && T[j] == 0) {
+ ++ht->count;
+ T[j] = new_item;
+ return 0;
+ }
+ n = ht->offsets[(k + 1) & N] - offset + 1;
+ if (mode == ht_multi) {
+ /* Don't search for match before inserting. */
+ j = (j + n) & N;
+ n = 0;
+ }
+ while (n--) {
+ item = T[j];
+ if (ht_match(key, len, item)) {
+ if (mode == ht_replace) {
+ T[j] = new_item;
+ }
+ return item;
+ }
+ j = (j + 1) & N;
+ }
+ ++ht->count;
+ while (k != j) {
+ /* Only increment buckets after own bucket. */
+ k = (k + 1) & N;
+ overflow |= ++ht->offsets[k];
+ }
+ while ((item = T[j])) {
+ T[j] = new_item;
+ new_item = item;
+ j = (j + 1) & N;
+ overflow |= ++ht->offsets[j];
+ }
+ T[j] = new_item;
+
+ if (overflow < 0) {
+ /*
+ * At least one offset overflowed, so we need to
+ * resize the table.
+ */
+ if (ht->count * 10 < ht->buckets) {
+ HT_PANIC("FATAL: hash table resize on low utilization would explode\n"\
+ " possible collision DoS or bad hash function");
+ return HT_NOMEM;
+ }
+ if (ht_resize(ht, ht->count * 2)) {
+ HT_PANIC("FATAL: hash table resize failed and left hash table inconsistent");\
+ /*
+ * This renders the hash table in a bad state
+ * because we have updated to an inconsistent
+ * state.
+ */
+ return HT_NOMEM;
+ }
+ }
+ return item;
+}
+
+ht_item_t ht_find(hash_table_t *ht, const void *key, size_t len)
+{
+ ht_item_t *T = ht->table;
+ size_t N, n, j, k, offset;
+ ht_item_t item;
+
+ if (T == 0) {
+ return 0;
+ }
+ N = ht->buckets - 1;
+ k = HT_HASH_FUNCTION(key, len) & N;
+ offset = ht->offsets[k];
+ j = (k + offset) & N;
+ if (offset == 0 && T[j] == 0) {
+ /* Special case because we cannot count zero probe length. */
+ return 0;
+ }
+ n = ht->offsets[(k + 1) & N] - offset + 1;
+ while (n--) {
+ item = T[j];
+ if (ht_match(key, len, item)) {
+ return item;
+ }
+ j = (j + 1) & N;
+ }
+ return 0;
+}
+
+ht_item_t ht_remove(hash_table_t *ht, const void *key, size_t len)
+{
+ ht_item_t *T = ht->table;
+ size_t N, n, j, k, offset;
+ ht_item_t item, *next_item;
+
+ if (T == 0) {
+ return 0;
+ }
+ N = ht->buckets - 1;
+ k = HT_HASH_FUNCTION(key, len) & N;
+ offset = ht->offsets[k];
+ j = (k + offset) & N;
+ if (offset == 0 && T[j] == 0) {
+ return 0;
+ }
+ n = ht->offsets[(k + 1) & N] - offset + 1;
+ while (n) {
+ item = T[j];
+ if (ht_match(key, len, item)) {
+ break;
+ }
+ j = (j + 1) & N;
+ --n;
+ }
+ if (n == 0) {
+ return 0;
+ }
+ --ht->count;
+ while (k != j) {
+ /* Do not update the offset of the bucket that we own. */
+ k = (k + 1) & N;
+ --ht->offsets[k];
+ }
+ for (;;) {
+ j = (j + 1) & N;
+ if (ht->offsets[j] == 0) {
+ T[k] = 0;
+ return item;
+ }
+ --ht->offsets[j];
+ T[k] = T[j];
+ k = j;
+ }
+}
+
+void ht_visit(hash_table_t *ht, ht_visitor_f *visitor, void *context)
+{
+ size_t i;
+ ht_item_t *T = ht->table;
+ ht_item_t item;
+
+ for (i = 0; i < ht->buckets; ++i) {
+ item = T[i];
+ if (item > (ht_item_t)1) {
+ visitor(context, item);
+ }
+ }
+}
+
+void ht_clear(hash_table_t *ht)
+{
+ if (ht->table) {
+ free(ht->table);
+ }
+ if (ht->offsets) {
+ free(ht->offsets);
+ }
+ memset(ht, 0, sizeof(*ht));
+}
diff --git a/external/hash/hash_test.c b/external/hash/hash_test.c
new file mode 100644
index 0000000..d54cc07
--- /dev/null
+++ b/external/hash/hash_test.c
@@ -0,0 +1,419 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* Not used here, just included to catch compiler errors and warnings. */
+#include "hash.h"
+
+#include "str_set.h"
+#include "token_map.h"
+#include "ht64.h"
+#include "ht32.h"
+#include "ht64rh.h"
+#include "ht32rh.h"
+
+#include "ht_trace.h"
+
+#define test_assert(x) if (!(x)) { printf("Test failed at %s:%d\n", __FILE__, __LINE__); assert(0); exit(1); }
+
+
+str_set_t S;
+token_map_t TM;
+
+char *keys[] = {
+ "foo",
+ "bar",
+ "baz",
+ "gimli",
+ "bofur"
+};
+
+struct token tokens[5];
+
+void free_key(void *context, char *key) {
+ free(key);
+}
+
+void test_str_set()
+{
+ int i;
+ char *s, *s0, *s1;
+ unsigned int n = sizeof(keys)/sizeof(keys[0]);
+
+ /* We rely on zero initialization here. */
+ test_assert(str_set_count(&S) == 0);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ /* We don't have to use strdup, but we test the
+ * allocation management and item replacement. */
+ s = str_set_insert(&S, s, strlen(s), strdup(s), ht_keep);
+ test_assert(str_set_count(&S) == i + 1);
+ test_assert(s == 0);
+ }
+ test_assert(n == 5);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s = str_set_find(&S, s, strlen(s));
+ test_assert(strcmp(s, keys[i]) == 0);
+ }
+ s = str_set_remove(&S, "gimlibofur", 5);
+ test_assert(strcmp(s, "gimli") == 0);
+ free(s);
+ test_assert(str_set_count(&S) == n - 1);
+ s = str_set_remove(&S, "gimlibofur", 5);
+ test_assert(s == 0);
+ test_assert(str_set_count(&S) == n - 1);
+ s = str_set_insert(&S, "foobarbaz", 6,
+ (s0 = strndup("foobarbaz", 6)), ht_keep);
+ test_assert(s == 0);
+ test_assert(str_set_count(&S) == n);
+ s = str_set_insert(&S, "foobarbaz", 6,
+ (s1 = strndup("foobarbaz", 6)), ht_keep);
+ test_assert(s == s0);
+ free(s1);
+ test_assert(str_set_count(&S) == n);
+ s = str_set_find(&S, "foobar", 6);
+ test_assert(s == s0);
+ s = str_set_insert(&S, "foobarbaz", 6,
+ (s1 = strndup("foobarbaz", 6)), ht_replace);
+ test_assert(s == s0);
+ free(s);
+ s = str_set_find(&S, "foobar", 6);
+ test_assert(s == s1);
+ s = str_set_find(&S, "foobarbaz", 9);
+ test_assert(s == 0);
+ str_set_destroy(&S, free_key, 0);
+ s = str_set_find(&S, "foobar", 6);
+ test_assert(s == 0);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s = str_set_find(&S, s, strlen(s));
+ test_assert(s == 0);
+ }
+}
+
+void test_str_set2()
+{
+ int i;
+ char *s, *s1;
+ unsigned int n = sizeof(keys)/sizeof(keys[0]);
+
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ str_set_insert(&S, s, strlen(s), s, ht_unique);
+ }
+ test_assert(str_set_count(&S) == n);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ /*
+ * Unique and multi are the same logically, but different
+ * intentionally.
+ */
+ str_set_insert(&S, s, strlen(s), s, ht_multi);
+ }
+ test_assert(str_set_count(&S) == 2 * n);
+ ht_trace_buckets(&S, "after double insert", 0, 8);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s1 = str_set_find(&S, s, strlen(s));
+ test_assert(strcmp(s, s1) == 0);
+ }
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s1 = str_set_remove(&S, s, strlen(s));
+ test_assert(strcmp(s, s1) == 0);
+ test_assert(str_set_count(&S) == 2 * n - i - 1);
+ ht_trace_buckets(&S, "after single", 8, 8);
+ }
+ ht_trace_buckets(&S, "after first remove", 0, 8);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s1 = str_set_remove(&S, s, strlen(s));
+ test_assert(strcmp(s, s1) == 0);
+ test_assert(str_set_count(&S) == n - i - 1);
+ }
+ ht_trace_buckets(&S, "efter second remove", 0, 8);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s1 = str_set_remove(&S, s, strlen(s));
+ test_assert(s1 == 0);
+ test_assert(str_set_count(&S) == 0);
+ }
+ str_set_clear(&S);
+}
+
+void test_str_set3()
+{
+ int i;
+ char *s, *s1;
+ unsigned int n = sizeof(keys)/sizeof(keys[0]);
+
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ str_set_insert_item(&S, s, ht_unique);
+ }
+ test_assert(str_set_count(&S) == n);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ str_set_insert_item(&S, s, ht_keep);
+ }
+ test_assert(str_set_count(&S) == n);
+ for (i = 0; i < n; ++i) {
+ s = keys[i];
+ s1 = str_set_find_item(&S, s);
+ test_assert(strcmp(s, s1) == 0);
+ }
+ s = keys[1];
+ s1 = str_set_remove_item(&S, s);
+ /*
+ * This doesn't always hold, but here we
+ * are sure because of how we inserted data.
+ */
+ test_assert(s == s1);
+ s1 = str_set_find_item(&S, s);
+ test_assert(s1 == 0);
+ str_set_clear(&S);
+}
+
+void test_str_set4()
+{
+ char *s, *s1;
+
+ s = "dumble";
+ str_set_insert_item(&S, "dumble", ht_keep);
+ s1 = str_set_find_item(&S, s);
+ /* TMnsert without replace. */
+ str_set_insert_item(&S, "2dumble" + 1, ht_keep);
+ test_assert(s == s1);
+ s1 = str_set_find_item(&S, s);
+ test_assert(s == s1);
+ /* TMnsert with replace. */
+ s1 = str_set_insert_item(&S, "2dumble" + 1, ht_replace);
+ /* Old value still returned. */
+ test_assert(s == s1);
+ s1 = str_set_find_item(&S, s);
+ test_assert(s != s1);
+ /* New item returned. */
+ test_assert(strcmp(s1 - 1, "2dumble") == 0);
+ str_set_clear(&S);
+}
+
+void visit_item_set(void *context, token_map_item_t item)
+{
+ int *count = context;
+ ++*count;
+}
+
+void test_token_map()
+{
+ int i, count;
+ token_map_item_t item;
+ unsigned int n = sizeof(keys)/sizeof(keys[0]);
+
+ test_assert(sizeof(tokens)/sizeof(item[0]) == n);
+
+ for (i = 0; i < n; ++i) {
+ tokens[i].token = keys[i];
+ tokens[i].len = strlen(keys[i]);
+ }
+ for (i = 0; i < n; ++i) {
+ item = &tokens[i];
+ token_map_insert(&TM, item->token, item->len, item, ht_unique);
+ }
+ count = 0;
+ token_map_visit(&TM, visit_item_set, &count);
+ test_assert(count == n);
+
+ for (i = 0; i < n; ++i) {
+ item = token_map_find(&TM, keys[i], strlen(keys[i]));
+ test_assert(item->type == 0);
+ item->type = 1;
+ }
+ for (i = 0; i < n; ++i) {
+ item = token_map_find_item(&TM, &tokens[i]);
+ test_assert(item->type == 1);
+ item->type = 2;
+ }
+}
+
+void test_ht32()
+{
+ uint32_t keys[100];
+ int i, j;
+ ht32_t ht;
+ uint32_t *x, *y;
+
+ ht32_init(&ht, 10);
+ for (i = 0; i < 100; ++i) {
+ keys[i] = i + 3398;
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht32_insert_item(&ht, &keys[i], ht_unique);
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht32_find_item(&ht, &keys[i]);
+ test_assert(x != 0);
+ test_assert(*x == i + 3398);
+ }
+ for (i = 0; i < 100; ++i) {
+ y = ht32_remove_item(&ht, &keys[i]);
+ test_assert(y != ht32_missing);
+ for (j = 0; j < 100; ++j) {
+ x = ht32_find_item(&ht, &keys[j]);
+ if (j > i) {
+ test_assert(x != ht32_missing);
+ test_assert(*x == j + 3398);
+ } else {
+ test_assert(x == ht32_missing);
+ }
+ }
+ }
+ ht32_clear(&ht);
+}
+
+void test_ht64()
+{
+ uint64_t keys[100];
+ int i, j;
+ ht64_t ht;
+ uint64_t *x, *y;
+
+ ht64_init(&ht, 10);
+ for (i = 0; i < 100; ++i) {
+ keys[i] = i + 3398;
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht64_insert_item(&ht, &keys[i], ht_unique);
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht64_find_item(&ht, &keys[i]);
+ test_assert(x != 0);
+ test_assert(*x == i + 3398);
+ }
+ for (i = 0; i < 100; ++i) {
+ y = ht64_remove_item(&ht, &keys[i]);
+ test_assert(y != ht64_missing);
+ for (j = 0; j < 100; ++j) {
+ x = ht64_find_item(&ht, &keys[j]);
+ if (j > i) {
+ test_assert(x != ht64_missing);
+ test_assert(*x == j + 3398);
+ } else {
+ test_assert(x == ht64_missing);
+ }
+ }
+ }
+ ht64_clear(&ht);
+}
+
+void test_ht32rh()
+{
+ uint32_t keys[100];
+ int i, j;
+ ht32rh_t ht;
+ uint32_t *x, *y;
+
+ ht32rh_init(&ht, 10);
+ for (i = 0; i < 100; ++i) {
+ keys[i] = i + 3398;
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht32rh_insert_item(&ht, &keys[i], ht_unique);
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht32rh_find_item(&ht, &keys[i]);
+ test_assert(x != 0);
+ test_assert(*x == i + 3398);
+ }
+ for (i = 0; i < 100; ++i) {
+ y = ht32rh_remove_item(&ht, &keys[i]);
+ test_assert(y != ht32rh_missing);
+ for (j = 0; j < 100; ++j) {
+ x = ht32rh_find_item(&ht, &keys[j]);
+ if (j > i) {
+ test_assert(x != ht32rh_missing);
+ test_assert(*x == j + 3398);
+ } else {
+ test_assert(x == ht32rh_missing);
+ }
+ }
+ }
+ ht32rh_clear(&ht);
+}
+
+void test_ht64rh()
+{
+ uint64_t keys[100];
+ int i, j;
+ ht64rh_t ht;
+ uint64_t *x, *y;
+
+ ht64rh_init(&ht, 10);
+ for (i = 0; i < 100; ++i) {
+ keys[i] = i + 3398;
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht64rh_insert_item(&ht, &keys[i], ht_unique);
+ }
+ for (i = 0; i < 100; ++i) {
+ x = ht64rh_find_item(&ht, &keys[i]);
+ test_assert(x != 0);
+ test_assert(*x == i + 3398);
+ }
+ for (i = 0; i < 100; ++i) {
+ y = ht64rh_remove_item(&ht, &keys[i]);
+ test_assert(y != ht64rh_missing);
+ for (j = 0; j < 100; ++j) {
+ x = ht64rh_find_item(&ht, &keys[j]);
+ if (j > i) {
+ test_assert(x != ht64rh_missing);
+ test_assert(*x == j + 3398);
+ } else {
+ test_assert(x == ht64rh_missing);
+ }
+ }
+ }
+ ht64rh_clear(&ht);
+}
+
+int main(int argc, char *argv[])
+{
+ test_str_set();
+ test_str_set2();
+ test_str_set3();
+ test_str_set4();
+ test_token_map();
+ test_ht32();
+ test_ht64();
+ test_ht32rh();
+ test_ht64rh();
+
+ printf("all tests passed\n");
+
+ return 0;
+}
diff --git a/external/hash/ht32.c b/external/hash/ht32.c
new file mode 100644
index 0000000..9954bde
--- /dev/null
+++ b/external/hash/ht32.c
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ht32.h"
+#define HT_HASH_FUNCTION ht_uint32_hash_function
+
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(ht32)
+
+#include "hash_table_impl.h"
+
+
+static inline int ht_match(const void *key, size_t len, const ht32_item_t item)
+{
+ return *(const ht32_item_t)key == *item;
+}
+
+static inline const void *ht_key(const ht32_item_t item)
+{
+ return (const void *)item;
+}
+
+static inline size_t ht_key_len(const ht32_item_t item)
+{
+ return sizeof(*item);
+}
diff --git a/external/hash/ht32.h b/external/hash/ht32.h
new file mode 100644
index 0000000..dab9ffb
--- /dev/null
+++ b/external/hash/ht32.h
@@ -0,0 +1,36 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HT32_H
+#define HT32_H
+
+#ifndef UINT8_MAX
+#include <stdint.h>
+#endif
+
+#include "hash_table.h"
+
+DECLARE_HASH_TABLE(ht32, uint32_t *)
+
+#endif /* HT32_H */
diff --git a/external/hash/ht32rh.c b/external/hash/ht32rh.c
new file mode 100644
index 0000000..de6dae2
--- /dev/null
+++ b/external/hash/ht32rh.c
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ht32rh.h"
+#define HT_HASH_FUNCTION ht_uint32_hash_function
+
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(ht32rh)
+
+#include "hash_table_impl_rh.h"
+
+
+static inline int ht_match(const void *key, size_t len, const ht32rh_item_t item)
+{
+ return *(const ht32rh_item_t)key == *item;
+}
+
+static inline const void *ht_key(const ht32rh_item_t item)
+{
+ return (const void *)item;
+}
+
+static inline size_t ht_key_len(const ht32rh_item_t item)
+{
+ return sizeof(*item);
+}
diff --git a/external/hash/ht32rh.h b/external/hash/ht32rh.h
new file mode 100644
index 0000000..061328e
--- /dev/null
+++ b/external/hash/ht32rh.h
@@ -0,0 +1,36 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HT32RH_H
+#define HT32RH_H
+
+#ifndef UINT8_MAX
+#include <stdint.h>
+#endif
+
+#include "hash_table.h"
+
+DECLARE_HASH_TABLE(ht32rh, uint32_t *)
+
+#endif /* HT32RH_H */
diff --git a/external/hash/ht64.c b/external/hash/ht64.c
new file mode 100644
index 0000000..eaebbc5
--- /dev/null
+++ b/external/hash/ht64.c
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ht64.h"
+#define HT_HASH_FUNCTION ht_uint64_hash_function
+
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(ht64)
+
+#include "hash_table_impl.h"
+
+
+static inline int ht_match(const void *key, size_t len, const ht64_item_t item)
+{
+ return *(const ht64_item_t)key == *item;
+}
+
+static inline const void *ht_key(const ht64_item_t item)
+{
+ return (const void *)item;
+}
+
+static inline size_t ht_key_len(const ht64_item_t item)
+{
+ return sizeof(*item);
+}
diff --git a/external/hash/ht64.h b/external/hash/ht64.h
new file mode 100644
index 0000000..b9f9fbe
--- /dev/null
+++ b/external/hash/ht64.h
@@ -0,0 +1,36 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HT64_H
+#define HT64_H
+
+#ifndef UINT8_MAX
+#include <stdint.h>
+#endif
+
+#include "hash_table.h"
+
+DECLARE_HASH_TABLE(ht64, uint64_t *)
+
+#endif /* HT64_H */
diff --git a/external/hash/ht64rh.c b/external/hash/ht64rh.c
new file mode 100644
index 0000000..bfde550
--- /dev/null
+++ b/external/hash/ht64rh.c
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ht64rh.h"
+#define HT_HASH_FUNCTION ht_uint64_hash_function
+
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(ht64rh)
+
+#include "hash_table_impl_rh.h"
+
+
+static inline int ht_match(const void *key, size_t len, const ht64rh_item_t item)
+{
+ return *(const ht64rh_item_t)key == *item;
+}
+
+static inline const void *ht_key(const ht64rh_item_t item)
+{
+ return (const void *)item;
+}
+
+static inline size_t ht_key_len(const ht64rh_item_t item)
+{
+ return sizeof(*item);
+}
diff --git a/external/hash/ht64rh.h b/external/hash/ht64rh.h
new file mode 100644
index 0000000..5b3d454
--- /dev/null
+++ b/external/hash/ht64rh.h
@@ -0,0 +1,36 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HT64RH_H
+#define HT64RH_H
+
+#ifndef UINT8_MAX
+#include <stdint.h>
+#endif
+
+#include "hash_table.h"
+
+DECLARE_HASH_TABLE(ht64rh, uint64_t *)
+
+#endif /* HT64RH_H */
diff --git a/external/hash/ht_hash_function.h b/external/hash/ht_hash_function.h
new file mode 100644
index 0000000..1f65ee5
--- /dev/null
+++ b/external/hash/ht_hash_function.h
@@ -0,0 +1,258 @@
+#ifndef HT_HASH_FUNCTION_H
+#define HT_HASH_FUNCTION_H
+
+#include <stddef.h>
+
+#ifdef _MSC_VER
+/* `inline` only advisory anyway. */
+#pragma warning(disable: 4710) /* function not inlined */
+#endif
+
+/* Avoid 0 special case in hash functions and allow for configuration with unguessable seed. */
+#ifndef HT_HASH_SEED
+#define HT_HASH_SEED UINT32_C(0x2f693b52)
+#endif
+
+#ifndef HT_HASH_32
+
+#include "cmetrohash.h"
+
+static inline size_t ht_default_hash_function(const void *key, size_t len)
+{
+ uint64_t out;
+
+ cmetrohash64_1((const uint8_t *)key, len, HT_HASH_SEED, (uint8_t *)&out);
+ return (unsigned int)out;
+}
+
+/* When using the pointer directly as a hash key. */
+static inline size_t ht_ptr_hash_function(const void *key, size_t len)
+{
+ /* MurmurHash3 64-bit finalizer */
+ uint64_t x;
+
+ (void)len;
+
+ x = ((uint64_t)(size_t)key) ^ (HT_HASH_SEED);
+
+ x ^= x >> 33;
+ x *= 0xff51afd7ed558ccdULL;
+ x ^= x >> 33;
+ x *= 0xc4ceb9fe1a85ec53ULL;
+ x ^= x >> 33;
+ return (size_t)x;
+}
+
+#else
+
+#include "PMurHash.h"
+
+static inline size_t ht_default_hash_function(const void *key, size_t len)
+{
+ return (size_t)PMurHash32((HT_HASH_SEED), key, (int)len);
+}
+
+/* When using the pointer directly as a hash key. */
+static inline size_t ht_ptr_hash_function(const void *key, size_t len)
+{
+ /* http://stackoverflow.com/a/12996028 */
+ size_t x;
+
+ x = (size_t)key ^ (HT_HASH_SEED);
+
+ x = ((x >> 16) ^ x) * 0x45d9f3bUL;
+ x = ((x >> 16) ^ x) * 0x45d9f3bUL;
+ x = ((x >> 16) ^ x);
+ return x;
+}
+
+#endif /* HT_HASH_32 */
+
+
+/* This assumes the key points to a 32-bit aligned random value that is its own hash function. */
+static inline size_t ht_uint32_identity_hash_function(const void *key, size_t len)
+{
+ (void)len;
+ return (size_t)*(uint32_t *)key;
+}
+
+/* This assumes the key points to a 64-bit aligned random value that is its own hash function. */
+static inline size_t ht_uint64_identity_hash_function(const void *key, size_t len)
+{
+ (void)len;
+ return (size_t)*(uint64_t *)key;
+}
+
+/* This assumes the key points to a 32-bit aligned value. */
+static inline size_t ht_uint32_hash_function(const void *key, size_t len)
+{
+ uint32_t x = *(uint32_t *)key + (uint32_t)(HT_HASH_SEED);
+
+ (void)len;
+
+ /* http://stackoverflow.com/a/12996028 */
+ x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b);
+ x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b);
+ x = ((x >> 16) ^ x);
+ return x;
+}
+
+/* This assumes the key points to a 64-bit aligned value. */
+static inline size_t ht_uint64_hash_function(const void *key, size_t len)
+{
+ uint64_t x = *(uint64_t *)key + UINT64_C(0x9e3779b97f4a7c15) + (uint64_t)(HT_HASH_SEED);
+
+ (void)len;
+
+ x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+ x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
+ return (size_t)(x ^ (x >> 31));
+}
+
+/*
+ * Suited for set operations of low-valued integers where the stored
+ * hash pointer is the key and the value.
+ *
+ * This function is especially useful for small hash tables (<1000)
+ * where collisions are cheap due to caching but also works for integer
+ * sets up to at least 1,000,000.
+ *
+ * NOTE: The multiplicative hash function by Knuth requires the modulo
+ * to table size be done by shifting the upper bits down, since this is
+ * where the quality bits reside. This yields significantly fewer
+ * collisions which is important for e.g. chained hashing. However, our
+ * interface does not provide the required information.
+ *
+ * When used in open hashing with load factors below 0.7 where the
+ * stored pointer is also the key, collision checking is very cheap and
+ * this pays off in a large range of table sizes where a more
+ * complicated hash simply doesn't pay off.
+ *
+ * When used with a pointer set where the pointer is also the key, it is
+ * not likely to work as well because the pointer acts as a large
+ * integer which works against the design of the hash function. Here a
+ * better mix function is probably worthwhile - therefore we also have
+ * ht_ptr_hash_function.
+ */
+static inline size_t ht_int_hash_function(const void *key, size_t len)
+{
+ (void)len;
+ return ((size_t)key ^ (HT_HASH_SEED)) * 2654435761UL;
+}
+
+/* Bernsteins hash function, assumes string is zero terminated, len is ignored. */
+static inline size_t ht_str_hash_function(const void *key, size_t len)
+{
+ const unsigned char *str = key;
+ size_t hash = 5381 ^ (HT_HASH_SEED);
+ size_t c;
+
+ (void)len;
+
+ while ((c = (size_t)*str++))
+ hash = ((hash << 5) + hash) ^ c; /* (hash * 33) xor c */
+
+ return hash;
+}
+
+/* Hashes at most len characters or until zero termination. */
+static inline size_t ht_strn_hash_function(const void *key, size_t len)
+{
+ const unsigned char *str = key;
+ size_t hash = 5381 ^ (HT_HASH_SEED);
+ size_t c;
+
+ while (--len && (c = (size_t)*str++))
+ hash = ((hash << 5) + hash) ^ c; /* (hash * 33) xor c */
+
+ return hash;
+}
+
+static inline uint32_t ht_fnv1a32_hash_function(const void *key, size_t len)
+{
+#ifndef FNV1A_NOMUL
+ const uint32_t prime = UINT32_C(0x1000193);
+#endif
+ uint32_t hash = UINT32_C(0x811c9dc5);
+ const uint8_t *p = key;
+
+ while (len--) {
+ hash ^= (uint64_t)*p++;
+#ifndef FNV1A_NOMUL
+ hash *= prime;
+#else
+ hash += (hash << 1) + (hash << 4) + (hash << 7) +
+ (hash << 8) + (hash << 24);
+#endif
+ }
+ return hash;
+}
+
+static inline uint64_t ht_fnv1a64_hash_function(const void *key, size_t len)
+{
+#ifndef FNV1A_NOMUL
+ const uint64_t prime = UINT64_C(0x100000001b3);
+#endif
+ uint64_t hash = UINT64_C(0xcbf29ce484222325);
+ const uint8_t *p = key;
+
+ while (len--) {
+ hash ^= (uint64_t)*p++;
+#ifndef FNV1A_NOMUL
+ hash *= prime;
+#else
+ hash += (hash << 1) + (hash << 4) + (hash << 5) +
+ (hash << 7) + (hash << 8) + (hash << 40);
+#endif
+ }
+ return hash;
+}
+
+/* Hashes until string termination and ignores length argument. */
+static inline uint32_t ht_fnv1a32_str_hash_function(const void *key, size_t len)
+{
+#ifndef FNV1A_NOMUL
+ const uint32_t prime = UINT32_C(0x1000193);
+#endif
+ uint32_t hash = UINT32_C(0x811c9dc5);
+ const uint8_t *p = key;
+
+ (void)len;
+
+ while (*p) {
+ hash ^= (uint64_t)*p++;
+#ifndef FNV1A_NOMUL
+ hash *= prime;
+#else
+ hash += (hash << 1) + (hash << 4) + (hash << 7) +
+ (hash << 8) + (hash << 24);
+#endif
+ }
+ return hash;
+}
+
+/* Hashes until string termination and ignores length argument. */
+static inline uint64_t ht_fnv1a64_str_hash_function(const void *key, size_t len)
+{
+#ifndef FNV1A_NOMUL
+ const uint64_t prime = UINT64_C(0x100000001b3);
+#endif
+ uint64_t hash = UINT64_C(0xcbf29ce484222325);
+ const uint8_t *p = key;
+
+ (void)len;
+
+ while (*p) {
+ hash ^= (uint64_t)*p++;
+#ifndef FNV1A_NOMUL
+ hash *= prime;
+#else
+ hash += (hash << 1) + (hash << 4) + (hash << 5) +
+ (hash << 7) + (hash << 8) + (hash << 40);
+#endif
+ }
+ return hash;
+}
+
+
+#endif /* HT_HASH_FUNCTION_H */
diff --git a/external/hash/ht_portable.h b/external/hash/ht_portable.h
new file mode 100644
index 0000000..3affc1d
--- /dev/null
+++ b/external/hash/ht_portable.h
@@ -0,0 +1,9 @@
+#ifndef HT_PORTABLE_H
+#define HT_PORTABLE_H
+
+#if defined(_MSC_VER) && !defined(inline)
+#define inline __inline
+#endif
+#include "pstdint.h"
+
+#endif
diff --git a/external/hash/ht_trace.h b/external/hash/ht_trace.h
new file mode 100644
index 0000000..63af4a8
--- /dev/null
+++ b/external/hash/ht_trace.h
@@ -0,0 +1,59 @@
+#ifndef HT_TRACE_H
+#define HT_TRACE_H
+
+#ifdef HT_TRACE_ON
+#ifndef HT_TRACE_OUT
+#define HT_TRACE_OUT stderr
+#endif
+
+#include <stdio.h>
+#define ht_trace(s) fprintf(HT_TRACE_OUT, "trace: %s\n", s)
+#define ht_tracei(s, i) fprintf(HT_TRACE_OUT, "trace: %s: %d\n", s, (int)i)
+#define ht_tracex(s, x) fprintf(HT_TRACE_OUT, "trace: %s: 0x%lx\n", s, (long)x)
+#define ht_traces(s, s2, len) fprintf(HT_TRACE_OUT, "trace: %s: %.*s\n", s, (int)len, s2)
+
+static void ht_trace_buckets(hash_table_t *ht, char *msg, int first, int count)
+{
+ int i, j, N, n;
+
+ n = ht->buckets;
+ N = n - 1;
+
+ if (count == 0) {
+ count = 32;
+ }
+ if (count > n) {
+ count = n;
+ }
+
+ first = first & N;
+ fprintf(HT_TRACE_OUT, "bucket trace: %s\n", msg);
+ if (n > count) {
+ n = count;
+ }
+ fprintf(HT_TRACE_OUT, "item count: %ld, bucket count %ld, utilization: %0.1f%%\n",
+ ht->count, ht->buckets, (double)ht->count / ht->buckets * 100);
+
+ if (ht->offsets) {
+ for (i = 0; i < n; ++i) {
+ j = (first + i) & N;
+ fprintf(HT_TRACE_OUT, "%03d:%08x:[%02d]\n",
+ j, (unsigned int)((void **)ht->table)[j], (unsigned int)ht->offsets[j]);
+ }
+ } else {
+ for (i = 0; i < n; ++i) {
+ j = (first + i) & N;
+ fprintf(HT_TRACE_OUT, "%03d:%08x\n", j, (unsigned int)((void **)ht->table)[j]);
+ }
+ }
+ fprintf(HT_TRACE_OUT, "--\n");
+}
+#else
+#define ht_trace(arg1) ((void)0)
+#define ht_tracei(arg1, arg2) ((void)0)
+#define ht_tracex(arg1, arg2) ((void)0)
+#define ht_traces(arg1, arg2, arg3) ((void)0)
+#define ht_trace_buckets(arg1, arg2, arg3, arg4) ((void)0)
+#endif
+
+#endif /* HT_TRACE_H */
diff --git a/external/hash/initbuild.sh b/external/hash/initbuild.sh
new file mode 100755
index 0000000..34a3fc0
--- /dev/null
+++ b/external/hash/initbuild.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+cd `dirname $0`
+mkdir -p "build/release"
+cd build/release && cmake -GNinja ../.. -DCMAKE_BUILD_TYPE=Release && ninja
diff --git a/external/hash/initbuild_debug.sh b/external/hash/initbuild_debug.sh
new file mode 100755
index 0000000..d190139
--- /dev/null
+++ b/external/hash/initbuild_debug.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+cd `dirname $0`
+mkdir -p "build/debug"
+cd build/debug && cmake -GNinja ../.. -DCMAKE_BUILD_TYPE=Debug && ninja
diff --git a/external/hash/int_set.h b/external/hash/int_set.h
new file mode 100644
index 0000000..b873ef9
--- /dev/null
+++ b/external/hash/int_set.h
@@ -0,0 +1,50 @@
+#ifndef INT_SET_H
+#define INT_SET_H
+
+#include "ptr_set.h"
+
+/*
+ * The values 0, 1, and 2 are reserved so we map integers
+ * before casting them to void *.
+ *
+ * Instead we disallow the largest positive integers.
+ *
+ * This is specfic to the implementation of ptr_set, so
+ * if it changes, we may have to change here as well.
+ */
+
+#define HT_INT_SET_OFFSET ((1 << (8 * sizeof(int) - 1)) - 2)
+#define HT_INT_TO_PTR(x) ((void *)(size_t)((x) - HT_INT_SET_OFFSET))
+#define HT_PTR_TO_INT(x) ((int)(size_t)(x) + HT_INT_SET_OFFSET)
+
+/* Return value helpers. */
+#define INT_SET_IS_MISSING(x) (HT_PTR_SET_MISSING(HT_INT_TO_PTR(x)))
+#define INT_SET_IS_ERROR(x) (HT_PTR_SET_IS_ERROR(HT_INT_TO_PTR(x)))
+#define INT_SET_IS_VALID(x) (HT_PTR_SET_IS_VALID(HT_INT_TO_PTR(x)))
+
+typedef ptr_set_t int_set_t;
+
+/* Returns 1 if already present, 0 otherwise. */
+static inline int int_set_add(int_set_t *S, int x)
+{
+ return ptr_set_insert_item(S, HT_INT_TO_PTR(x), ht_keep) != 0;
+}
+
+/* Returns 1 if removed, 0 otherwise. */
+static inline int int_set_remove(int_set_t *S, int x)
+{
+ return ptr_set_remove_item(S, HT_INT_TO_PTR(x)) != 0;
+}
+
+static inline int int_set_count(int_set_t *S)
+{
+ return ptr_set_count(S);
+}
+
+/* Returns 1 if present, 0 otherwise. */
+static inline int int_set_exists(int_set_t *S, int x)
+{
+ return ptr_set_exists(S, HT_INT_TO_PTR(x));
+}
+
+#endif /* INT_SET_H */
diff --git a/external/hash/load_test.c b/external/hash/load_test.c
new file mode 100644
index 0000000..1c3d0e7
--- /dev/null
+++ b/external/hash/load_test.c
@@ -0,0 +1,86 @@
+#include <assert.h>
+#include <sys/time.h>
+#include <stdio.h>
+
+//#define INT_SET_PRIVATE
+#ifdef INT_SET_PRIVATE
+/* Make all hash functions private to this module for better
+ * performance. This may not be necessary depending on compiler
+ * optimizations. clang 4.2 -O3 benefits while -O4 figures it and get
+ * same speed with external linkage. */
+#define HT_PRIVATE
+#include "int_set.h"
+#include "ptr_set.c"
+#undef HT_PRIVATE
+#else
+/* Use external linkage. Link with ptr_set.c which int_set depends upon. */
+#include "int_set.h"
+#endif
+
+struct timeval time_diff(struct timeval start, struct timeval end)
+{
+ struct timeval temp;
+ if ((end.tv_usec-start.tv_usec)<0) {
+ temp.tv_sec = end.tv_sec-start.tv_sec-1;
+ temp.tv_usec = 1000000+end.tv_usec-start.tv_usec;
+ } else {
+ temp.tv_sec = end.tv_sec-start.tv_sec;
+ temp.tv_usec = end.tv_usec-start.tv_usec;
+ }
+ return temp;
+}
+
+double elapsed_ms(struct timeval td)
+{
+ return (double)td.tv_sec * 1000 + (double)td.tv_usec / 1000;
+}
+
+void test_int_set()
+{
+ int i, x;
+ const int N = 1000000;
+ //const int N = 1000;
+ int_set_t ht = {0};
+ int_set_t *S = &ht;
+ double ms, nsop, opms;
+ struct timeval t1, t2, td;
+
+ for (i = 1; i <= N; ++i) {
+ int_set_add(S, i);
+ assert(int_set_exists(S, i));
+ }
+ assert(int_set_count(S) == N);
+
+ for (i = 1; i <= N; ++i) {
+ assert(int_set_exists(S, i));
+ }
+
+ gettimeofday(&t1, 0);
+ for (x = 0, i = 1; i <= N; ++i) {
+ x += int_set_exists(S, i);
+ }
+ gettimeofday(&t2, 0);
+
+ td = time_diff(t1, t2);
+ ms = elapsed_ms(td);
+
+ nsop = ms * 1000000 / x;
+ opms = (double)x / ms;
+ printf("%d out of %d keys found in time %0.03f ms or %0.01f ns per op\n",
+ x, N, ms, nsop);
+ printf("ops / ms: %0.0f\n", opms);
+
+ for (i = 1; i <= N; ++i) {
+ assert(int_set_count(S) == N - i + 1);
+ assert(int_set_exists(S, i));
+ int_set_remove(S, i);
+ assert(!int_set_exists(S, i));
+ }
+ assert(int_set_count(S) == 0);
+}
+
+int main(int argc, char *argv[])
+{
+ test_int_set();
+ return 0;
+}
diff --git a/external/hash/pstdint.h b/external/hash/pstdint.h
new file mode 100644
index 0000000..14444aa
--- /dev/null
+++ b/external/hash/pstdint.h
@@ -0,0 +1,898 @@
+/* A portable stdint.h
+ ****************************************************************************
+ * BSD License:
+ ****************************************************************************
+ *
+ * Copyright (c) 2005-2016 Paul Hsieh
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ * Version 0.1.15.2
+ *
+ * The ANSI C standard committee, for the C99 standard, specified the
+ * inclusion of a new standard include file called stdint.h. This is
+ * a very useful and long desired include file which contains several
+ * very precise definitions for integer scalar types that is
+ * critically important for making portable several classes of
+ * applications including cryptography, hashing, variable length
+ * integer libraries and so on. But for most developers its likely
+ * useful just for programming sanity.
+ *
+ * The problem is that some compiler vendors chose to ignore the C99
+ * standard and some older compilers have no opportunity to be updated.
+ * Because of this situation, simply including stdint.h in your code
+ * makes it unportable.
+ *
+ * So that's what this file is all about. Its an attempt to build a
+ * single universal include file that works on as many platforms as
+ * possible to deliver what stdint.h is supposed to. Even compilers
+ * that already come with stdint.h can use this file instead without
+ * any loss of functionality. A few things that should be noted about
+ * this file:
+ *
+ * 1) It is not guaranteed to be portable and/or present an identical
+ * interface on all platforms. The extreme variability of the
+ * ANSI C standard makes this an impossibility right from the
+ * very get go. Its really only meant to be useful for the vast
+ * majority of platforms that possess the capability of
+ * implementing usefully and precisely defined, standard sized
+ * integer scalars. Systems which are not intrinsically 2s
+ * complement may produce invalid constants.
+ *
+ * 2) There is an unavoidable use of non-reserved symbols.
+ *
+ * 3) Other standard include files are invoked.
+ *
+ * 4) This file may come in conflict with future platforms that do
+ * include stdint.h. The hope is that one or the other can be
+ * used with no real difference.
+ *
+ * 5) In the current verison, if your platform can't represent
+ * int32_t, int16_t and int8_t, it just dumps out with a compiler
+ * error.
+ *
+ * 6) 64 bit integers may or may not be defined. Test for their
+ * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ * Note that this is different from the C99 specification which
+ * requires the existence of 64 bit support in the compiler. If
+ * this is not defined for your platform, yet it is capable of
+ * dealing with 64 bits then it is because this file has not yet
+ * been extended to cover all of your system's capabilities.
+ *
+ * 7) (u)intptr_t may or may not be defined. Test for its presence
+ * with the test: #ifdef PTRDIFF_MAX. If this is not defined
+ * for your platform, then it is because this file has not yet
+ * been extended to cover all of your system's capabilities, not
+ * because its optional.
+ *
+ * 8) The following might not been defined even if your platform is
+ * capable of defining it:
+ *
+ * WCHAR_MIN
+ * WCHAR_MAX
+ * (u)int64_t
+ * PTRDIFF_MIN
+ * PTRDIFF_MAX
+ * (u)intptr_t
+ *
+ * 9) The following have not been defined:
+ *
+ * WINT_MIN
+ * WINT_MAX
+ *
+ * 10) The criteria for defining (u)int_least(*)_t isn't clear,
+ * except for systems which don't have a type that precisely
+ * defined 8, 16, or 32 bit types (which this include file does
+ * not support anyways). Default definitions have been given.
+ *
+ * 11) The criteria for defining (u)int_fast(*)_t isn't something I
+ * would trust to any particular compiler vendor or the ANSI C
+ * committee. It is well known that "compatible systems" are
+ * commonly created that have very different performance
+ * characteristics from the systems they are compatible with,
+ * especially those whose vendors make both the compiler and the
+ * system. Default definitions have been given, but its strongly
+ * recommended that users never use these definitions for any
+ * reason (they do *NOT* deliver any serious guarantee of
+ * improved performance -- not in this file, nor any vendor's
+ * stdint.h).
+ *
+ * 12) The following macros:
+ *
+ * PRINTF_INTMAX_MODIFIER
+ * PRINTF_INT64_MODIFIER
+ * PRINTF_INT32_MODIFIER
+ * PRINTF_INT16_MODIFIER
+ * PRINTF_LEAST64_MODIFIER
+ * PRINTF_LEAST32_MODIFIER
+ * PRINTF_LEAST16_MODIFIER
+ * PRINTF_INTPTR_MODIFIER
+ *
+ * are strings which have been defined as the modifiers required
+ * for the "d", "u" and "x" printf formats to correctly output
+ * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ * (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ * PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ * provide their own stdint.h. PRINTF_INT64_MODIFIER is not
+ * defined if INT64_MAX is not defined. These are an extension
+ * beyond what C99 specifies must be in stdint.h.
+ *
+ * In addition, the following macros are defined:
+ *
+ * PRINTF_INTMAX_HEX_WIDTH
+ * PRINTF_INT64_HEX_WIDTH
+ * PRINTF_INT32_HEX_WIDTH
+ * PRINTF_INT16_HEX_WIDTH
+ * PRINTF_INT8_HEX_WIDTH
+ * PRINTF_INTMAX_DEC_WIDTH
+ * PRINTF_INT64_DEC_WIDTH
+ * PRINTF_INT32_DEC_WIDTH
+ * PRINTF_INT16_DEC_WIDTH
+ * PRINTF_UINT8_DEC_WIDTH
+ * PRINTF_UINTMAX_DEC_WIDTH
+ * PRINTF_UINT64_DEC_WIDTH
+ * PRINTF_UINT32_DEC_WIDTH
+ * PRINTF_UINT16_DEC_WIDTH
+ * PRINTF_UINT8_DEC_WIDTH
+ *
+ * Which specifies the maximum number of characters required to
+ * print the number of that type in either hexadecimal or decimal.
+ * These are an extension beyond what C99 specifies must be in
+ * stdint.h.
+ *
+ * Compilers tested (all with 0 warnings at their highest respective
+ * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ * This file should be considered a work in progress. Suggestions for
+ * improvements, especially those which increase coverage are strongly
+ * encouraged.
+ *
+ * Acknowledgements
+ *
+ * The following people have made significant contributions to the
+ * development and testing of this file:
+ *
+ * Chris Howie
+ * John Steele Scott
+ * Dave Thorup
+ * John Dill
+ * Florian Wobbe
+ * Christopher Sean Morrison
+ * Mikkel Fahnoe Jorgensen
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(_MSC_VER) && _MSC_VER >= 1600) || (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__)) && !(defined(__APPLE__) && defined(__MACH__))
+# ifndef PRINTF_INT64_MODIFIER
+# define PRINTF_INT64_MODIFIER "l"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER ""
+# endif
+# else
+# ifndef PRINTF_INT64_MODIFIER
+# define PRINTF_INT64_MODIFIER "ll"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+# if (UINT_MAX == UINT32_MAX)
+# define PRINTF_INT32_MODIFIER ""
+# else
+# define PRINTF_INT32_MODIFIER "l"
+# endif
+# endif
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+# define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_UINT64_HEX_WIDTH
+# define PRINTF_UINT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_UINT32_HEX_WIDTH
+# define PRINTF_UINT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_UINT16_HEX_WIDTH
+# define PRINTF_UINT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_UINT8_HEX_WIDTH
+# define PRINTF_UINT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "19"
+# endif
+# ifndef PRINTF_UINT64_DEC_WIDTH
+# define PRINTF_UINT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_UINT32_DEC_WIDTH
+# define PRINTF_UINT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_UINT16_DEC_WIDTH
+# define PRINTF_UINT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_UINT8_DEC_WIDTH
+# define PRINTF_UINT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+# define PRINTF_INTMAX_HEX_WIDTH PRINTF_UINT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_UINTMAX_HEX_WIDTH
+# define PRINTF_UINTMAX_HEX_WIDTH PRINTF_UINT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+# define PRINTF_INTMAX_DEC_WIDTH PRINTF_UINT64_DEC_WIDTH
+# endif
+# ifndef PRINTF_UINTMAX_DEC_WIDTH
+# define PRINTF_UINTMAX_DEC_WIDTH PRINTF_UINT64_DEC_WIDTH
+# endif
+
+/*
+ * Something really weird is going on with Open Watcom. Just pull some of
+ * these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+# if !defined (INT64_C)
+# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
+# endif
+# if !defined (UINT64_C)
+# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
+# endif
+# if !defined (INT32_C)
+# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
+# endif
+# if !defined (UINT32_C)
+# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
+# endif
+# if !defined (INT16_C)
+# define INT16_C(x) (x)
+# endif
+# if !defined (UINT16_C)
+# define UINT16_C(x) (x)
+# endif
+# if !defined (INT8_C)
+# define INT8_C(x) (x)
+# endif
+# if !defined (UINT8_C)
+# define UINT8_C(x) (x)
+# endif
+# if !defined (UINT64_MAX)
+# define UINT64_MAX 18446744073709551615ULL
+# endif
+# if !defined (INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+# endif
+# if !defined (UINT32_MAX)
+# define UINT32_MAX 4294967295UL
+# endif
+# if !defined (INT32_MAX)
+# define INT32_MAX 2147483647L
+# endif
+# if !defined (INTMAX_MAX)
+# define INTMAX_MAX INT64_MAX
+# endif
+# if !defined (INTMAX_MIN)
+# define INTMAX_MIN INT64_MIN
+# endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ * Deduce the type assignments from limits.h under the assumption that
+ * integer sizes in bits are powers of 2, and follow the ANSI
+ * definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#if !defined(uint8_t) && !defined(_UINT8_T)
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+ typedef unsigned char uint8_t;
+# define UINT8_C(v) ((uint8_t) v)
+# else
+# error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#if !defined(int8_t) && !defined(_INT8_T)
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+ typedef signed char int8_t;
+# define INT8_C(v) ((int8_t) v)
+# else
+# error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#if !defined(uint16_t) && !defined(_UINT16_T)
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+ typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+# define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+ typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+# define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#if !defined(int16_t) && !defined(_INT16_T)
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+ typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+# define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+ typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+# define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#if !defined(uint32_t) && !defined(_UINT32_T)
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+ typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+ typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+ typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#if !defined(int32_t) && !defined(_INT32_T)
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+ typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+ typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+ typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+# define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ * The macro stdint_int64_defined is temporarily used to record
+ * whether or not 64 integer support is available. It must be
+ * defined for any 64 integer extensions for new platforms that are
+ * added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
+# define stdint_int64_defined
+ typedef long long int64_t;
+ typedef unsigned long long uint64_t;
+# define UINT64_C(v) v ## ULL
+# define INT64_C(v) v ## LL
+# ifndef PRINTF_INT64_MODIFIER
+# define PRINTF_INT64_MODIFIER "ll"
+# endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+# define stdint_int64_defined
+ __extension__ typedef long long int64_t;
+ __extension__ typedef unsigned long long uint64_t;
+# define UINT64_C(v) v ## ULL
+# define INT64_C(v) v ## LL
+# ifndef PRINTF_INT64_MODIFIER
+# define PRINTF_INT64_MODIFIER "ll"
+# endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+# define stdint_int64_defined
+ typedef long long int64_t;
+ typedef unsigned long long uint64_t;
+# define UINT64_C(v) v ## ULL
+# define INT64_C(v) v ## LL
+# ifndef PRINTF_INT64_MODIFIER
+# define PRINTF_INT64_MODIFIER "ll"
+# endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+# define stdint_int64_defined
+ typedef __int64 int64_t;
+ typedef unsigned __int64 uint64_t;
+# define UINT64_C(v) v ## UI64
+# define INT64_C(v) v ## I64
+# ifndef PRINTF_INT64_MODIFIER
+# define PRINTF_INT64_MODIFIER "I64"
+# endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ * Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "19"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+#ifndef PRINTF_UINT64_DEC_WIDTH
+# define PRINTF_UINT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_UINT32_DEC_WIDTH
+# define PRINTF_UINT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_UINT16_DEC_WIDTH
+# define PRINTF_UINT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_UINT8_DEC_WIDTH
+# define PRINTF_UINT8_DEC_WIDTH "3"
+#endif
+
+/*
+ * Ok, lets not worry about 128 bit integers for now. Moore's law says
+ * we don't need to worry about that until about 2040 at which point
+ * we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+ typedef int64_t intmax_t;
+ typedef uint64_t uintmax_t;
+# define INTMAX_MAX INT64_MAX
+# define INTMAX_MIN INT64_MIN
+# define UINTMAX_MAX UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define INTMAX_C(v) INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+ typedef int32_t intmax_t;
+ typedef uint32_t uintmax_t;
+# define INTMAX_MAX INT32_MAX
+# define UINTMAX_MAX UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define INTMAX_C(v) INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ * Because this file currently only supports platforms which have
+ * precise powers of 2 as bit sizes for the default integers, the
+ * least definitions are all trivial. Its possible that a future
+ * version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+ typedef int8_t int_least8_t;
+ typedef uint8_t uint_least8_t;
+ typedef int16_t int_least16_t;
+ typedef uint16_t uint_least16_t;
+ typedef int32_t int_least32_t;
+ typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define UINT_LEAST8_MAX UINT8_MAX
+# define INT_LEAST8_MAX INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define INT_LEAST16_MAX INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define INT_LEAST32_MAX INT32_MAX
+# define INT_LEAST8_MIN INT8_MIN
+# define INT_LEAST16_MIN INT16_MIN
+# define INT_LEAST32_MIN INT32_MIN
+# ifdef stdint_int64_defined
+ typedef int64_t int_least64_t;
+ typedef uint64_t uint_least64_t;
+# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+# define UINT_LEAST64_MAX UINT64_MAX
+# define INT_LEAST64_MAX INT64_MAX
+# define INT_LEAST64_MIN INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ * The ANSI C committee pretending to know or specify anything about
+ * performance is the epitome of misguided arrogance. The mandate of
+ * this file is to *ONLY* ever support that absolute minimum
+ * definition of the fast integer types, for compatibility purposes.
+ * No extensions, and no attempt to suggest what may or may not be a
+ * faster integer type will ever be made in this file. Developers are
+ * warned to stay away from these types when using this or any other
+ * stdint.h.
+ */
+
+typedef int_least8_t int_fast8_t;
+typedef uint_least8_t uint_fast8_t;
+typedef int_least16_t int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef int_least32_t int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define UINT_FAST8_MAX UINT_LEAST8_MAX
+#define INT_FAST8_MAX INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define INT_FAST16_MAX INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define INT_FAST32_MAX INT_LEAST32_MAX
+#define INT_FAST8_MIN INT_LEAST8_MIN
+#define INT_FAST16_MIN INT_LEAST16_MIN
+#define INT_FAST32_MIN INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+ typedef int_least64_t int_fast64_t;
+ typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define INT_FAST64_MAX INT_LEAST64_MAX
+# define INT_FAST64_MIN INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ * Whatever piecemeal, per compiler thing we can do about the wchar_t
+ * type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+# define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+# define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ * Whatever piecemeal, per compiler/platform thing we can do about the
+ * (u)intptr_t types and limits.
+ */
+
+#if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__)
+# define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+# define stdint_intptr_bits 16
+# else
+# define stdint_intptr_bits 32
+# endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__)
+# define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what did Intel do about x86-64? */
+# else
+/* #error "This platform might not be supported yet" */
+# endif
+
+# ifdef stdint_intptr_bits
+# define stdint_intptr_glue3_i(a,b,c) a##b##c
+# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c)
+# ifndef PRINTF_INTPTR_MODIFIER
+# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+# endif
+# ifndef PTRDIFF_MAX
+# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+# endif
+# ifndef PTRDIFF_MIN
+# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+# endif
+# ifndef UINTPTR_MAX
+# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+# endif
+# ifndef INTPTR_MAX
+# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+# endif
+# ifndef INTPTR_MIN
+# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+# endif
+# ifndef INTPTR_C
+# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+# endif
+# ifndef UINTPTR_C
+# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+# endif
+ typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+ typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+ nothing for the definition of uintptr_t. */
+ typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ * Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/*
+ * Please compile with the maximum warning settings to make sure macros are
+ * not defined more than once.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits)
+
+#define REPORTERROR(msg) { err_n++; if (err_first <= 0) err_first = __LINE__; printf msg; }
+
+int main () {
+ int err_n = 0;
+ int err_first = 0;
+ DECL(I,8)
+ DECL(U,8)
+ DECL(I,16)
+ DECL(U,16)
+ DECL(I,32)
+ DECL(U,32)
+#ifdef INT64_MAX
+ DECL(I,64)
+ DECL(U,64)
+#endif
+ intmax_t imax = INTMAX_C(0);
+ uintmax_t umax = UINTMAX_C(0);
+ char str0[256], str1[256];
+
+ sprintf (str0, "%" PRINTF_INT32_MODIFIER "d", INT32_C(2147483647));
+ if (0 != strcmp (str0, "2147483647")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str0));
+ if (atoi(PRINTF_INT32_DEC_WIDTH) != (int) strlen(str0)) REPORTERROR (("Something wrong with PRINTF_INT32_DEC_WIDTH : %s\n", PRINTF_INT32_DEC_WIDTH));
+ sprintf (str0, "%" PRINTF_INT32_MODIFIER "u", UINT32_C(4294967295));
+ if (0 != strcmp (str0, "4294967295")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str0));
+ if (atoi(PRINTF_UINT32_DEC_WIDTH) != (int) strlen(str0)) REPORTERROR (("Something wrong with PRINTF_UINT32_DEC_WIDTH : %s\n", PRINTF_UINT32_DEC_WIDTH));
+#ifdef INT64_MAX
+ sprintf (str1, "%" PRINTF_INT64_MODIFIER "d", INT64_C(9223372036854775807));
+ if (0 != strcmp (str1, "9223372036854775807")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str1));
+ if (atoi(PRINTF_INT64_DEC_WIDTH) != (int) strlen(str1)) REPORTERROR (("Something wrong with PRINTF_INT64_DEC_WIDTH : %s, %d\n", PRINTF_INT64_DEC_WIDTH, (int) strlen(str1)));
+ sprintf (str1, "%" PRINTF_INT64_MODIFIER "u", UINT64_C(18446744073709550591));
+ if (0 != strcmp (str1, "18446744073709550591")) REPORTERROR (("Something wrong with PRINTF_INT32_MODIFIER : %s\n", str1));
+ if (atoi(PRINTF_UINT64_DEC_WIDTH) != (int) strlen(str1)) REPORTERROR (("Something wrong with PRINTF_UINT64_DEC_WIDTH : %s, %d\n", PRINTF_UINT64_DEC_WIDTH, (int) strlen(str1)));
+#endif
+
+ sprintf (str0, "%d %x\n", 0, ~0);
+
+ sprintf (str1, "%d %x\n", i8, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i8 : %s\n", str1));
+ sprintf (str1, "%u %x\n", u8, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with u8 : %s\n", str1));
+ sprintf (str1, "%d %x\n", i16, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i16 : %s\n", str1));
+ sprintf (str1, "%u %x\n", u16, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with u16 : %s\n", str1));
+ sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i32 : %s\n", str1));
+ sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with u32 : %s\n", str1));
+#ifdef INT64_MAX
+ sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with i64 : %s\n", str1));
+#endif
+ sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with imax : %s\n", str1));
+ sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
+ if (0 != strcmp (str0, str1)) REPORTERROR (("Something wrong with umax : %s\n", str1));
+
+ TESTUMAX(8);
+ TESTUMAX(16);
+ TESTUMAX(32);
+#ifdef INT64_MAX
+ TESTUMAX(64);
+#endif
+
+#define STR(v) #v
+#define Q(v) printf ("sizeof " STR(v) " = %u\n", (unsigned) sizeof (v));
+ if (err_n) {
+ printf ("pstdint.h is not correct. Please use sizes below to correct it:\n");
+ }
+
+ Q(int)
+ Q(unsigned)
+ Q(long int)
+ Q(short int)
+ Q(int8_t)
+ Q(int16_t)
+ Q(int32_t)
+#ifdef INT64_MAX
+ Q(int64_t)
+#endif
+
+ return EXIT_SUCCESS;
+}
+
+#endif
diff --git a/external/hash/ptr_set.c b/external/hash/ptr_set.c
new file mode 100644
index 0000000..ab12ddf
--- /dev/null
+++ b/external/hash/ptr_set.c
@@ -0,0 +1,60 @@
+/*
+ * Creates a set of stored pointers by using the pointer itself as key.
+ *
+ * (void *)0 (HT_MISSING) cannot be stored.
+ * (void *)1 (HT_DELETED) also cannot be stored.
+ *
+ * ht_item, ht_key, ht_key_len, and ht_match are required.
+ *
+ * In this case HT_HASH_FUNCTION is also required because
+ * we do not read the content of the key but use the pointer
+ * itself as a key. The default behavior would crash.
+ *
+ * Only one hash table can be defined in a single compilation unit
+ * because of static function names in the generic implementation.
+ */
+
+#include "ptr_set.h"
+
+static inline size_t ptr_set_hash_function(const void *s, size_t len);
+#define HT_HASH_FUNCTION ptr_set_hash_function
+
+#define HT_LOAD_FACTOR 0.7
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(ptr_set)
+
+#if defined(PTR_SET_RH)
+#include "hash_table_impl_rh.h"
+#else
+#include "hash_table_impl.h"
+#endif
+
+static inline const void *ht_key(ht_item_t x)
+{
+ return (const void *)x;
+}
+
+static inline size_t ht_key_len(ht_item_t x)
+{
+ return sizeof(x);
+}
+
+static inline int ht_match(const void *key, size_t len, ht_item_t x)
+{
+ (void)len;
+ return (size_t)key == (size_t)x;
+}
+
+static inline size_t ptr_set_hash_function(const void *s, size_t len)
+{
+#if defined (PTR_SET_PTR_HASH)
+ /* Murmur hash like finalization step. */
+ return ht_ptr_hash_function(s, len);
+#elif defined (PTR_SET_INT_HASH)
+ /* Knuths multiplication. */
+ return ht_int_hash_function(s, len);
+#else
+ (void)len;
+ return ht_default_hash_function(&s, sizeof(char *));
+#endif
+}
diff --git a/external/hash/ptr_set.h b/external/hash/ptr_set.h
new file mode 100644
index 0000000..f66e70e
--- /dev/null
+++ b/external/hash/ptr_set.h
@@ -0,0 +1,19 @@
+#ifndef HT_PTR_SET_H
+#define HT_PTR_SET_H
+
+#include "hash_table.h"
+
+DECLARE_HASH_TABLE(ptr_set, void *)
+
+/* Return value helpers - these are specific to the implementation. */
+#define PTR_SET_IS_MISSING(x) ((void *)x == (void *)0)
+#define PTR_SET_IS_ERROR(x) ((void *)x == (void *)2)
+#define PTR_SET_IS_VALID(x) ((void *)x > (void *)2)
+
+/* Extensions to std. interface. */
+static inline int ptr_set_exists(ptr_set_t *S, void *p)
+{
+ return ptr_set_find_item(S, p) != (void *)0;
+}
+
+#endif /* HT_PTR_SET_H */
diff --git a/external/hash/str_set.c b/external/hash/str_set.c
new file mode 100644
index 0000000..87a3766
--- /dev/null
+++ b/external/hash/str_set.c
@@ -0,0 +1,61 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+
+#include "str_set.h"
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(str_set)
+#if defined(STR_SET_RH)
+#include "hash_table_impl_rh.h"
+#else
+#include "hash_table_impl.h"
+#endif
+
+/*
+ * Simple default implementation of a hash set. The stored items are
+ * zero-terminated strings. The hash table does not manage the
+ * allocation of the strings, like it doesn't manage any stored items.
+ * However, it items are created with, say, strndup, a destructor can be
+ * provided to free each item when clearing the table. The remove
+ * operation also returns the removed item so it can be deallocated by
+ * callee.
+ *
+ * In general, the key and the item are different, but here they are the
+ * same. Normally the key would be referenced by the item.
+ */
+static inline int ht_match(const void *key, size_t len, str_set_item_t item)
+{
+ return strncmp(key, item, len) == 0;
+}
+
+static inline const void *ht_key(str_set_item_t item)
+{
+ return (const void *)item;
+}
+
+static inline size_t ht_key_len(str_set_item_t item)
+{
+ return strlen(item);
+}
diff --git a/external/hash/str_set.h b/external/hash/str_set.h
new file mode 100644
index 0000000..df5d1c7
--- /dev/null
+++ b/external/hash/str_set.h
@@ -0,0 +1,32 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef STR_SET_H
+#define STR_SET_H
+
+#include "hash_table.h"
+
+DECLARE_HASH_TABLE(str_set, char *)
+
+#endif /* STR_SET_H */
diff --git a/external/hash/token_map.c b/external/hash/token_map.c
new file mode 100644
index 0000000..9bf85df
--- /dev/null
+++ b/external/hash/token_map.c
@@ -0,0 +1,54 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+
+/* These are just example settings. */
+
+#include "token_map.h"
+#define HT_LOAD_FACTOR 0.85
+/* Quadratic probing is ignored with Robin Hood hashing. */
+#define HT_PROBE_QUADRATIC
+#include "hash_table_def.h"
+DEFINE_HASH_TABLE(token_map)
+#if defined(TOKEN_MAP_RH)
+#include "hash_table_impl_rh.h"
+#else
+#include "hash_table_impl.h"
+#endif
+
+static inline const void *ht_key(ht_item_t item)
+{
+ return item->token;
+}
+
+static inline size_t ht_key_len(ht_item_t item)
+{
+ return item->len;
+}
+
+static inline int ht_match(const void *key, size_t len, ht_item_t item)
+{
+ return len == item->len && memcmp(key, item->token, len) == 0;
+}
diff --git a/external/hash/token_map.h b/external/hash/token_map.h
new file mode 100644
index 0000000..700c60e
--- /dev/null
+++ b/external/hash/token_map.h
@@ -0,0 +1,39 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef TOKEN_MAP_H
+#define TOKEN_MAP_H
+
+#include "hash_table.h"
+
+struct token {
+ char *token;
+ size_t len;
+ int type;
+ void *data;
+};
+
+DECLARE_HASH_TABLE(token_map, struct token *)
+
+#endif /* TOKEN_MAP_H */
diff --git a/external/hash/unaligned.h b/external/hash/unaligned.h
new file mode 100644
index 0000000..0431f96
--- /dev/null
+++ b/external/hash/unaligned.h
@@ -0,0 +1,42 @@
+#ifndef UNALIGNED_H
+#define UNALIGNED_H
+
+/*
+ * This is a simplified version of portable/punaligned.h that does not depend on
+ * endian detection, but which assumes x86 is always little endian.
+ * Include the portable version for better precision.
+ */
+
+#ifndef unaligned_read_le16toh
+
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+
+#define unaligned_read_le16toh(p) (*(uint16_t*)(p))
+#define unaligned_read_le32toh(p) (*(uint32_t*)(p))
+#define unaligned_read_le64toh(p) (*(uint64_t*)(p))
+
+#else
+
+#define unaligned_read_le16toh(p) ( \
+ (((uint16_t)(((uint8_t *)(p))[0])) << 0) | \
+ (((uint16_t)(((uint8_t *)(p))[1])) << 8))
+
+#define unaligned_read_le32toh(p) ( \
+ (((uint32_t)(((uint8_t *)(p))[0])) << 0) | \
+ (((uint32_t)(((uint8_t *)(p))[1])) << 8) | \
+ (((uint32_t)(((uint8_t *)(p))[2])) << 16) | \
+ (((uint32_t)(((uint8_t *)(p))[3])) << 24))
+
+#define unaligned_read_le64toh(p) ( \
+ (((uint64_t)(((uint8_t *)(p))[0])) << 0) | \
+ (((uint64_t)(((uint8_t *)(p))[1])) << 8) | \
+ (((uint64_t)(((uint8_t *)(p))[2])) << 16) | \
+ (((uint64_t)(((uint8_t *)(p))[3])) << 24) | \
+ (((uint64_t)(((uint8_t *)(p))[4])) << 32) | \
+ (((uint64_t)(((uint8_t *)(p))[5])) << 40) | \
+ (((uint64_t)(((uint8_t *)(p))[6])) << 48) | \
+ (((uint64_t)(((uint8_t *)(p))[7])) << 56))
+#endif
+#endif
+
+#endif /* UNALIGNED_H */
diff --git a/external/lex/LICENSE b/external/lex/LICENSE
new file mode 100644
index 0000000..8e84a48
--- /dev/null
+++ b/external/lex/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/external/lex/README.md b/external/lex/README.md
new file mode 100644
index 0000000..3144091
--- /dev/null
+++ b/external/lex/README.md
@@ -0,0 +1,3 @@
+Essential files extracted from the luthor scanner - a generic scanner
+similar to a handwritten scanner, but covering many common cases by
+default.
diff --git a/external/lex/luthor.c b/external/lex/luthor.c
new file mode 100644
index 0000000..fc81985
--- /dev/null
+++ b/external/lex/luthor.c
@@ -0,0 +1,1509 @@
+/*
+ * Designed to be included in other C files which define emitter
+ * operations. The same source may thus be used to parse different
+ * grammars.
+ *
+ * The operators cover the most common operators i the C family. Each
+ * operator does not have a name, it is represent by a long token code
+ * with up to 4 ASCII characters embedded literally. This avoids any
+ * semantic meaning at the lexer level. Emitters macros can redefine
+ * this behavior.
+ *
+ * No real harm is done in accepting a superset, but the source is
+ * intended to be modified, have things flagged or removed, other things
+ * added. The real complicity is in numbers, identifiers, and comments,
+ * which should be fairly complete with flagging as is.
+ *
+ * Keyword handling is done at macroes, and described elsewhere, but for
+ * identifier compatible keywords, this is quite efficient to handle on
+ * a per language basis without modifying this source.
+ *
+ * The Lisp language family is somewhat different and not directly
+ * suited for this lexer, although it can easily be modified to suit.
+ * The main reason is ';' for comments, and operators used as part of
+ * the identifier symbol set, and no need for operator classification,
+ * and different handling of single character symbols.
+ *
+ * So overall, we more or less have one efficient unified lexer that can
+ * manage many languages - this is good, because it is a pain to write a
+ * new lexer by hand, and lexer tools are what they are.
+ */
+
+#include "luthor.h"
+
+#ifdef LEX_C99_NUMERIC
+#define LEX_C_NUMERIC
+#define LEX_HEX_FLOAT_NUMERIC
+#define LEX_BINARY_NUMERIC
+#endif
+
+#ifdef LEX_C_NUMERIC
+#define LEX_C_OCTAL_NUMERIC
+#define LEX_HEX_NUMERIC
+#endif
+
+#ifdef LEX_JULIA_NUMERIC
+#ifdef LEX_C_OCTAL_NUMERIC
+/*
+ * LEX_JULIA_OCTAL_NUMERIC and LEX_C_OCTAL_NUMERIC can technically
+ * coexist, but leading zeroes give C style leading zero numbers
+ * which can lead to incorrect values depending on expectations.
+ * Therefore the full LEX_JULIA_NUMERIC flag is designed to not allow this.
+ */
+#error "LEX_C_OCTAL_NUMERIC conflicts with LEX_JULIA_NUMERIC leading zero integers"
+#endif
+
+/*
+ * Julia v0.3 insists on lower case, and has a different meaning for
+ * upper case.
+ */
+#define LEX_LOWER_CASE_NUMERIC_PREFIX
+#define LEX_JULIA_OCTAL_NUMERIC
+#define LEX_HEX_FLOAT_NUMERIC
+#define LEX_BINARY_NUMERIC
+
+#endif
+
+#ifdef LEX_HEX_FLOAT_NUMERIC
+#define LEX_HEX_NUMERIC
+#endif
+
+/*
+ * Numeric and string constants do not accept prefixes such as u, l, L,
+ * U, ll, LL, f, or F in C, or various others in Julia strings. Use the
+ * parser to detect juxtaposition between identifier and constant. In
+ * Julia numeric suffix means multiplication, in C it is a type
+ * qualifier. Sign, such as defined in JSON, are also not accepted -
+ * they must be operators. See source for various flag to enable
+ * different token types.
+ */
+
+/*
+ * Includes '_' in identifers by default. Defines follow characters in
+ * identifiers but not the lead character - it must be defined in switch
+ * cases. If the identifier allows for dash '-', it is probably better
+ * to handle it as an operator and flag surrounding space in the parser.
+ */
+#ifndef lex_isalnum
+
+/*
+ * NOTE: isalnum, isalpha, is locale dependent. We only want to
+ * to consider that ASCII-7 subset and treat everything else as utf-8.
+ * This table is not for leading identifiers, as it contains 0..9.
+ *
+ * For more correct handling of UTF-8, see:
+ * https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Grammar+Lexicon
+ * based on Java Ident = NameStartChar NameChar*
+ *
+ * While the following is UTF-16, it can be adapted to UTF-8 easily.
+
+
+ fragment
+ NameChar
+ : NameStartChar
+ | '0'..'9'
+ | '_'
+ | '\u00B7'
+ | '\u0300'..'\u036F'
+ | '\u203F'..'\u2040'
+ ;
+ fragment
+ NameStartChar
+ : 'A'..'Z' | 'a'..'z'
+ | '\u00C0'..'\u00D6'
+ | '\u00D8'..'\u00F6'
+ | '\u00F8'..'\u02FF'
+ | '\u0370'..'\u037D'
+ | '\u037F'..'\u1FFF'
+ | '\u200C'..'\u200D'
+ | '\u2070'..'\u218F'
+ | '\u2C00'..'\u2FEF'
+ | '\u3001'..'\uD7FF'
+ | '\uF900'..'\uFDCF'
+ | '\uFDF0'..'\uFFFD'
+ ;
+ */
+
+static const char lex_alnum[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0..9 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ /* A..O */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* P..Z, _ */
+#ifdef LEX_ID_WITHOUT_UNDERSCORE
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+#else
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+#endif
+ /* a..o */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* p..z */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+#ifdef LEX_ID_WITH_UTF8
+ /* utf-8 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#else
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#endif
+};
+
+#define lex_isalnum(c) (lex_alnum[(unsigned char)(c)])
+#endif
+
+#ifndef lex_isbindigit
+#define lex_isbindigit(c) ((c) == '0' || (c) == '1')
+#endif
+
+#ifndef lex_isoctdigit
+#define lex_isoctdigit(c) ((unsigned)((c) - '0') < 8)
+#endif
+
+#ifndef lex_isdigit
+#define lex_isdigit(c) ((c) >= '0' && (c) <= '9')
+#endif
+
+#ifndef lex_ishexdigit
+#define lex_ishexdigit(c) (((c) >= '0' && (c) <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'))
+#endif
+
+#ifndef lex_isctrl
+#include <ctype.h>
+#define lex_isctrl(c) ((c) < 0x20 || (c) == 0x7f)
+#endif
+
+#ifndef lex_isblank
+#define lex_isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+
+#ifndef lex_iszterm
+#define lex_iszterm(c) ((c) == '\0')
+#endif
+
+/*
+ * If ZTERM is disabled, zero will be a LEX_CTRL token
+ * and allowed to be embedded in comments and strings, or
+ * elsewhere, as long as the parser accepts the token.
+ */
+#ifdef LEX_DISABLE_ZTERM
+#undef lex_iszterm
+#define lex_iszterm(c) (0)
+#endif
+
+/*
+ * The mode is normally LEX_MODE_NORMAL = 0 initially, or the returned
+ * mode from a previous call, unless LEX_MODE_INVALID = 1 was returned.
+ * If a buffer stopped in the middle of a string or a comment, the mode
+ * will reflect that. In all cases some amount of recovery is needed
+ * before starting a new buffer - see detailed comments in header file.
+ * If only a single buffer is used, special handling is still needed if
+ * the last line contains a single line comment because it will not be
+ * terminated, but it amounts to replace the emitted unterminated
+ * comment token with an end of comment token.
+ *
+ * Instead of 0, the mode can initially also be LEX_MODE_BOM - it will
+ * an strip optional BOM before moving to normal mode. Currently only
+ * UTF-8 BOM is supported, and this is unlikely to change.
+ *
+ * The context variable is user-defined and available to emitter macros.
+ * It may be null if unused.
+ *
+ */
+static int lex(const char *buf, size_t len, int mode, void *context)
+{
+ const char *p, *q, *s, *d;
+#if 0
+ /* TODO: old, remove this */
+ , *z, *f;
+#endif
+
+ p = buf; /* next char */
+ q = p + len; /* end of buffer */
+ s = p; /* start of token */
+ d = p; /* end of integer part */
+
+#if 0
+ /* TODO: old, remove this */
+
+ /* Used for float and leading zero detection in numerics. */
+ z = p;
+ f = p;
+#endif
+
+ /*
+ * Handle mid string and mid comment for reentering across
+ * buffer boundaries. Strip embedded counter from mode.
+ */
+ switch(mode & (LEX_MODE_COUNT_BASE - 1)) {
+
+ case LEX_MODE_NORMAL:
+ goto lex_mode_normal;
+
+ case LEX_MODE_BOM:
+ goto lex_mode_bom;
+
+#ifdef LEX_C_STRING
+ case LEX_MODE_C_STRING:
+ goto lex_mode_c_string;
+#endif
+#ifdef LEX_PYTHON_BLOCK_STRING
+ case LEX_MODE_PYTHON_BLOCK_STRING:
+ goto lex_mode_python_block_string;
+#endif
+#ifdef LEX_C_STRING_SQ
+ case LEX_MODE_C_STRING_SQ:
+ goto lex_mode_c_string_sq;
+#endif
+#ifdef LEX_PYTHON_BLOCK_STRING_SQ
+ case LEX_MODE_PYTHON_BLOCK_STRING_SQ:
+ goto lex_mode_python_block_string_sq;
+#endif
+#ifdef LEX_C_BLOCK_COMMENT
+ case LEX_MODE_C_BLOCK_COMMENT:
+ goto lex_mode_c_block_comment;
+#endif
+#if defined(LEX_SHELL_LINE_COMMENT) || defined(LEX_C99_LINE_COMMENT)
+ case LEX_MODE_LINE_COMMENT:
+ goto lex_mode_line_comment;
+#endif
+#ifdef LEX_JULIA_NESTED_COMMENT
+ case LEX_MODE_JULIA_NESTED_COMMENT:
+ goto lex_mode_julia_nested_comment;
+#endif
+
+ default:
+ /*
+ * This is mostly to kill unused label warning when comments
+ * are disabled.
+ */
+ goto lex_mode_exit;
+ }
+
+lex_mode_bom:
+
+ mode = LEX_MODE_BOM;
+
+ /*
+ * Special entry mode to consume utf-8 bom if present. We don't
+ * support other boms, but we would use the same token if we did.
+ *
+ * We generally expect no bom present, but it is here if needed
+ * without requiring ugly hacks elsewhere.
+ */
+ if (p + 3 < q && p[0] == '\xef' && p[1] == '\xbb' && p[2] == '\xbf') {
+ p += 3;
+ lex_emit_bom(s, p);
+ }
+ goto lex_mode_normal;
+
+/* If source is updated, also update LEX_C_STRING_SQ accordingly. */
+#ifdef LEX_C_STRING
+lex_mode_c_string:
+
+ mode = LEX_MODE_C_STRING;
+
+ for (;;) {
+ --p;
+ /* We do not allow blanks that are also control characters, such as \t. */
+ while (++p != q && *p != '\\' && *p != '\"' && !lex_isctrl(*p)) {
+ }
+ if (s != p) {
+ lex_emit_string_part(s, p);
+ s = p;
+ }
+ if (*p == '\"') {
+ ++p;
+ lex_emit_string_end(s, p);
+ goto lex_mode_normal;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\\') {
+ ++p;
+ /* Escape is only itself, whatever is escped follows separately. */
+ lex_emit_string_escape(s, p);
+ s = p;
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\\' || *p == '\"') {
+ ++p;
+ continue;
+ }
+ /*
+ * Flag only relevant for single line strings, as it
+ * controls whether we fail on unterminated string at line
+ * ending with '\'.
+ *
+ * Julia does not support line continuation in strings
+ * (or elsewhere). C, Python, and Javascript do.
+ */
+#ifndef LEX_DISABLE_STRING_CONT
+ if (*p == '\n') {
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (*p == '\r') {
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+#endif
+ }
+ if (*p == '\n' || *p == '\r') {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ ++p;
+ lex_emit_string_ctrl(s);
+ s = p;
+ }
+#endif
+
+/*
+ * This is a copy if LEX_C_STRING with single quote. It's not DRY, but
+ * no reason to parameterized inner loops, just because. Recopy of
+ * changes are to the above.
+ *
+ * Even if single quote is only used for CHAR types, it makes sense to
+ * parse as a full string since there can be all sorts of unicocde
+ * escapes and line continuations, newlines to report and unexpected
+ * control characters to deal with.
+ */
+#ifdef LEX_C_STRING_SQ
+lex_mode_c_string_sq:
+
+ mode = LEX_MODE_C_STRING_SQ;
+
+ for (;;) {
+ --p;
+ while (++p != q && *p != '\\' && *p != '\'' && !lex_isctrl(*p)) {
+ }
+ if (s != p) {
+ lex_emit_string_part(s, p);
+ s = p;
+ }
+ if (*p == '\'') {
+ ++p;
+ lex_emit_string_end(s, p);
+ goto lex_mode_normal;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\\') {
+ ++p;
+ /* Escape is only itself, whatever is escped follows separately. */
+ lex_emit_string_escape(s, p);
+ s = p;
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\\' || *p == '\'') {
+ ++p;
+ continue;
+ }
+ /*
+ * Flag only relevant for single line strings, as it
+ * controls whether we fail on unterminated string at line
+ * ending with '\'.
+ *
+ * Julia does not support line continuation in strings
+ * (or elsewhere). C, Python, and Javascript do.
+ */
+#ifndef LEX_DISABLE_STRING_CONT
+ if (*p == '\n') {
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (*p == '\r') {
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+#endif
+ }
+ if (*p == '\n' || *p == '\r') {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ ++p;
+ lex_emit_string_ctrl(s);
+ s = p;
+ }
+#endif
+
+/*
+ * """ Triple quoted Python block strings. """
+ * Single quoted version (''') is a direct copy, update both places
+ * if a changed is needed.
+ *
+ * Note: there is no point in disabling line continuation
+ * for block strings, since it only affects unterminated
+ * string errors at newline. It all comes down to how
+ * escaped newline is interpreted by the parser.
+ */
+#ifdef LEX_PYTHON_BLOCK_STRING
+lex_mode_python_block_string:
+
+ mode = LEX_MODE_PYTHON_BLOCK_STRING;
+
+ for (;;) {
+ --p;
+ while (++p != q && *p != '\\' && !lex_isctrl(*p)) {
+ if (*p == '\"' && p + 2 < q && p[1] == '\"' && p[2] == '\"') {
+ break;
+ }
+ }
+ if (s != p) {
+ lex_emit_string_part(s, p);
+ s = p;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\"') {
+ p += 3;
+ lex_emit_string_end(s, p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\\') {
+ /* Escape is only itself, allowing parser to interpret and validate. */
+ ++p;
+ lex_emit_string_escape(s, p);
+ s = p;
+ if (p + 1 != q && (*p == '\\' || *p == '\"')) {
+ ++p;
+ }
+ continue;
+ }
+ if (*p == '\n') {
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (*p == '\r') {
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+ ++p;
+ lex_emit_string_ctrl(s);
+ s = p;
+ }
+#endif
+
+/*
+ * Python ''' style strings.
+ * Direct copy of """ quote version, update both if changed.
+ */
+#ifdef LEX_PYTHON_BLOCK_STRING_SQ
+lex_mode_python_block_string_sq:
+
+ mode = LEX_MODE_PYTHON_BLOCK_STRING_SQ;
+
+ for (;;) {
+ --p;
+ while (++p != q && *p != '\\' && !lex_isctrl(*p)) {
+ if (*p == '\'' && p + 2 < q && p[1] == '\'' && p[2] == '\'') {
+ break;
+ }
+ }
+ if (s != p) {
+ lex_emit_string_part(s, p);
+ s = p;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_string_unterminated(p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\'') {
+ p += 3;
+ lex_emit_string_end(s, p);
+ goto lex_mode_normal;
+ }
+ if (*p == '\\') {
+ /* Escape is only itself, allowing parser to interpret and validate. */
+ ++p;
+ lex_emit_string_escape(s, p);
+ s = p;
+ if (p + 1 != q && (*p == '\\' || *p == '\'')) {
+ ++p;
+ }
+ continue;
+ }
+ if (*p == '\n') {
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (*p == '\r') {
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_string_newline(s, p);
+ s = p;
+ continue;
+ }
+ ++p;
+ lex_emit_string_ctrl(s);
+ s = p;
+ }
+#endif
+
+/*
+ * We don't really care if it is a shell style comment or a C99,
+ * or any other line oriented commment, as the termination is
+ * the same.
+ */
+#if defined(LEX_SHELL_LINE_COMMENT) || defined(LEX_C99_LINE_COMMENT)
+lex_mode_line_comment:
+
+ mode = LEX_MODE_LINE_COMMENT;
+
+ for (;;) {
+ --p;
+ while (++p != q && (!lex_isctrl(*p))) {
+ }
+ if (s != p) {
+ lex_emit_comment_part(s, p);
+ s = p;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ /*
+ * Unterminated comment here is not necessarily true,
+ * not even likely, nor possible, but we do this to
+ * handle buffer switch consistently: any non-normal
+ * mode exit will have an unterminated token to fix up.
+ * Here it would be conversion to end of comment, which
+ * we cannot know yet, since the line might continue in
+ * the next buffer. This is a zero length token.
+ */
+ lex_emit_comment_unterminated(p);
+ goto lex_mode_exit;
+ }
+ if (*p == '\n' || *p == '\r') {
+ lex_emit_comment_end(s, p);
+ goto lex_mode_normal;
+ }
+ ++p;
+ lex_emit_comment_ctrl(s);
+ s = p;
+ }
+#endif
+
+#ifdef LEX_C_BLOCK_COMMENT
+lex_mode_c_block_comment:
+
+ mode = LEX_MODE_C_BLOCK_COMMENT;
+
+ for (;;) {
+ --p;
+ while (++p != q && (!lex_isctrl(*p))) {
+ if (*p == '/' && p[-1] == '*') {
+ --p;
+ break;
+ }
+ }
+ if (s != p) {
+ lex_emit_comment_part(s, p);
+ s = p;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_comment_unterminated(p);
+ goto lex_mode_exit;
+ }
+ if (*p == '\n') {
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (*p == '\r') {
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (lex_isctrl(*p)) {
+ ++p;
+ lex_emit_comment_ctrl(s);
+ s = p;
+ continue;
+ }
+ p += 2;
+ lex_emit_comment_end(s, p);
+ s = p;
+ goto lex_mode_normal;
+ }
+#endif
+
+ /* Julia nests block comments as #= ... #= ...=# ... =# across multiple lines. */
+#ifdef LEX_JULIA_NESTED_COMMENT
+lex_mode_julia_nested_comment:
+
+ /* Preserve nesting level on re-entrance. */
+ if ((mode & (LEX_MODE_COUNT_BASE - 1)) != LEX_MODE_JULIA_NESTED_COMMENT) {
+ mode = LEX_MODE_JULIA_NESTED_COMMENT;
+ }
+ /* We have already entered. */
+ mode += LEX_MODE_COUNT_BASE;
+
+ for (;;) {
+ --p;
+ while (++p != q && !lex_isctrl(*p)) {
+ if (*p == '#') {
+ if (p[-1] == '=') {
+ --p;
+ break;
+ }
+ if (p + 1 != q && p[1] == '=') {
+ break;
+ }
+ }
+ }
+ if (s != p) {
+ lex_emit_comment_part(s, p);
+ s = p;
+ }
+ if (p == q || lex_iszterm(*p)) {
+ lex_emit_comment_unterminated(p);
+ goto lex_mode_exit;
+ }
+ if (*p == '\n') {
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (*p == '\r') {
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_newline(s, p);
+ s = p;
+ continue;
+ }
+ if (lex_isctrl(*p)) {
+ ++p;
+ lex_emit_comment_ctrl(s);
+ s = p;
+ continue;
+ }
+ if (*p == '=') {
+ p += 2;
+ lex_emit_comment_end(s, p);
+ s = p;
+ mode -= LEX_MODE_COUNT_BASE;
+ if (mode / LEX_MODE_COUNT_BASE > 0) {
+ continue;
+ }
+ goto lex_mode_normal;
+ }
+ /* The upper bits are used as counter. */
+ mode += LEX_MODE_COUNT_BASE;
+ p += 2;
+ lex_emit_comment_begin(s, p, 0);
+ s = p;
+ if (mode / LEX_MODE_COUNT_BASE > LEX_MAX_NESTING_LEVELS) {
+ /* Prevent malicious input from overflowing counter. */
+ lex_emit_comment_deeply_nested(p);
+ lex_emit_abort(p);
+ return mode;
+ }
+ }
+#endif
+
+/* Unlike other modes, we can always jump here without updating token start `s` first. */
+lex_mode_normal:
+
+ mode = LEX_MODE_NORMAL;
+
+ while (p != q) {
+ s = p;
+
+ switch(*p) {
+
+#ifndef LEX_DISABLE_ZTERM
+ case '\0':
+ lex_emit_eos(s, p);
+ return mode;
+#endif
+
+ /* \v, \f etc. are covered by the CTRL token, don't put it here. */
+ case '\t': case ' ':
+ while (++p != q && lex_isblank(*p)) {
+ }
+ lex_emit_blank(s, p);
+ continue;
+
+ /*
+ * Newline should be emitter in all constructs, also comments
+ * and strings which have their own newline handling.
+ * Only one line is emitted at a time permitting simple line
+ * counting.
+ */
+ case '\n':
+ if (++p != q && *p == '\r') {
+ ++p;
+ }
+ lex_emit_newline(s, p);
+ continue;
+
+ case '\r':
+ if (++p != q && *p == '\n') {
+ ++p;
+ }
+ lex_emit_newline(s, p);
+ continue;
+
+ /*
+ * C-style string, and Python style triple double quote
+ * delimited multi-line string. Prefix and suffix symbols
+ * should be parsed separately, e.g. L"hello" are two
+ * tokens.
+ */
+#if defined(LEX_C_STRING) || defined(LEX_PYTHON_BLOCK_STRING)
+ case '\"':
+#ifdef LEX_PYTHON_BLOCK_STRING
+ if (p + 2 < q && p[1] == '\"' && p[2] == '\"') {
+ p += 3;
+ lex_emit_string_begin(s, p);
+ s = p;
+ goto lex_mode_python_block_string;
+ }
+#endif
+#ifdef LEX_C_STRING
+ ++p;
+ lex_emit_string_begin(s, p);
+ s = p;
+ goto lex_mode_c_string;
+#endif
+#endif
+
+ /*
+ * Single quoted version of strings, otherwise identical
+ * behavior. Can also be used for char constants if checked
+ * by parser subsequently.
+ */
+#if defined(LEX_C_STRING_SQ) || defined(LEX_PYTHON_BLOCK_STRING_SQ)
+ case '\'':
+#ifdef LEX_PYTHON_BLOCK_STRING_SQ
+ if (p + 2 < q && p[1] == '\'' && p[2] == '\'') {
+ p += 3;
+ lex_emit_string_begin(s, p);
+ s = p;
+ goto lex_mode_python_block_string_sq;
+ }
+#endif
+#ifdef LEX_C_STRING_SQ
+ ++p;
+ lex_emit_string_begin(s, p);
+ s = p;
+ goto lex_mode_c_string_sq;
+#endif
+#endif
+
+#if defined(LEX_SHELL_LINE_COMMENT) || defined(LEX_JULIA_NESTED_COMMENT)
+ /*
+ * Line comment excluding terminal line break.
+ *
+ * See also C99 line comment `//`.
+ *
+ * Julia uses `#=` and `=#` for nested block comments.
+ * (According to Julia developers, '#=` is motivated by `=`
+ * not being likely to start anything that you would put a
+ * comment around, unlike `#{`, `}#` or `#(`, `)#`)).
+ *
+ * Some known doc comment formats are identified and
+ * included in the comment_begin token.
+ */
+ case '#':
+ ++p;
+#ifdef LEX_JULIA_NESTED_COMMENT
+ if (p != q && *p == '=') {
+ ++p;
+ lex_emit_comment_begin(s, p, 0);
+ s = p;
+ goto lex_mode_julia_nested_comment;
+ }
+#endif
+ lex_emit_comment_begin(s, p, 0);
+ s = p;
+ goto lex_mode_line_comment;
+#endif
+
+ case '/':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+#ifdef LEX_C99_LINE_COMMENT
+ case '/':
+ ++p;
+ p += p != q && (*p == '/' || *p == '!');
+ lex_emit_comment_begin(s, p, (p - s == 3));
+ s = p;
+ goto lex_mode_line_comment;
+#endif
+#ifdef LEX_C_BLOCK_COMMENT
+ case '*':
+ ++p;
+ p += p != q && (*p == '*' || *p == '!');
+ lex_emit_comment_begin(s, p, (p - s == 3));
+ s = p;
+ goto lex_mode_c_block_comment;
+#endif
+ case '=':
+ ++p;
+ lex_emit_compound_op('/', '=', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('/', s, p);
+ continue;
+
+ case '(': case ')': case '[': case ']': case '{': case '}':
+ case ',': case ';': case '\\': case '?':
+ ++p;
+ lex_emit_op(*s, s, p);
+ continue;
+
+ case '%': case '!': case '~': case '^':
+ ++p;
+ if (p != q && *p == '=') {
+ ++p;
+ lex_emit_compound_op(*s, '=', s, p);
+ continue;
+ }
+ lex_emit_op(*s, s, p);
+ continue;
+
+ case '|':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_compound_op('|', '=', s, p);
+ continue;
+ case '|':
+ ++p;
+ lex_emit_compound_op('|', '|', s, p);
+ break;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('|', s, p);
+ continue;
+
+ case '&':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_compound_op('&', '=', s, p);
+ continue;
+ case '&':
+ ++p;
+ lex_emit_compound_op('&', '&', s, p);
+ break;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('&', s, p);
+ continue;
+
+ case '=':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '>':
+ ++p;
+ lex_emit_compound_op('=', '>', s, p);
+ continue;
+ case '=':
+ ++p;
+ if (p != q && *p == '=') {
+ ++p;
+ lex_emit_tricompound_op('=', '=', '=', s, p);
+ continue;
+ }
+ lex_emit_compound_op('=', '=', s, p);
+ break;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('=', s, p);
+ continue;
+
+ case ':':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_compound_op(':', '=', s, p);
+ continue;
+ case ':':
+ ++p;
+ if (p != q && *p == '=') {
+ ++p;
+ lex_emit_tricompound_op(':', ':', '=', s, p);
+ continue;
+ }
+ lex_emit_compound_op(':', ':', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op(':', s, p);
+ continue;
+
+ case '*':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ lex_emit_compound_op('*', '=', s, p);
+ continue;
+ case '*':
+ /* **= hardly used anywhere? */
+ lex_emit_compound_op('*', '*', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('*', s, p);
+ continue;
+
+ case '<':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '-':
+ ++p;
+ lex_emit_compound_op('<', '-', s, p);
+ continue;
+ case '=':
+ ++p;
+ lex_emit_compound_op('<', '=', s, p);
+ continue;
+ case '<':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_tricompound_op('<', '<', '=', s, p);
+ continue;
+ case '<':
+ ++p;
+ if (p != q && *p == '=') {
+ ++p;
+ lex_emit_quadcompound_op('<', '<', '<', '=', s, p);
+ continue;
+ }
+ lex_emit_tricompound_op('<', '<', '<', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_compound_op('<', '<', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('<', s, p);
+ continue;
+
+ case '>':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_compound_op('>', '=', s, p);
+ continue;
+ case '>':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_tricompound_op('>', '>', '=', s, p);
+ continue;
+ case '>':
+ ++p;
+ if (p != q && *p == '=') {
+ ++p;
+ lex_emit_quadcompound_op('>', '>', '>', '=', s, p);
+ continue;
+ }
+ lex_emit_tricompound_op('>', '>', '>', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_compound_op('>', '>', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('>', s, p);
+ continue;
+
+ case '-':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_compound_op('-', '=', s, p);
+ continue;
+ case '-':
+ ++p;
+ lex_emit_compound_op('-', '-', s, p);
+ continue;
+ case '>':
+ ++p;
+ lex_emit_compound_op('-', '>', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('-', s, p);
+ continue;
+
+ case '+':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '=':
+ ++p;
+ lex_emit_compound_op('+', '=', s, p);
+ continue;
+
+ case '+':
+ ++p;
+ lex_emit_compound_op('+', '+', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('+', s, p);
+ continue;
+
+ case '.':
+ ++p;
+ if (p != q) {
+ switch (*p) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ d = s;
+ goto lex_dot_to_fraction_part;
+ case '.':
+ ++p;
+ if (p != q && *p == '.') {
+ ++p;
+ lex_emit_tricompound_op('.', '.', '.', s, p);
+ continue;
+ }
+ lex_emit_compound_op('.', '.', s, p);
+ continue;
+ default:
+ break;
+ }
+ }
+ lex_emit_op('.', s, p);
+ continue;
+
+ case '0':
+ if (++p != q) {
+ switch (*p) {
+#ifdef LEX_C_OCTAL_NUMERIC
+
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ while (++p != q && lex_isoctdigit(*p)) {
+ }
+ d = p;
+ if (p != q) {
+ /*
+ * Leading zeroes like 00.10 are valid C
+ * floating point constants.
+ */
+ if (*p == '.') {
+ goto lex_c_octal_to_fraction_part;
+ }
+ if (*p == 'e' || *p == 'E') {
+ goto lex_c_octal_to_exponent_part;
+ }
+ }
+ lex_emit_octal(s, p);
+ /*
+ * If we have a number like 0079, it becomes
+ * 007(octal), 9(decimal). The parser should
+ * deal with this.
+ *
+ * To add to confusion i64 is a C integer suffix
+ * like in 007i64, but 2+2i is a Go complex
+ * constant. (Not specific to octals).
+ *
+ * This can all be handled by having the parser inspect
+ * following identifier or numeric, parser
+ * here meaning a lexer post processing step, not
+ * necessarily the parser itself.
+ */
+
+ continue;
+#else
+ /*
+ * All integers reach default and enter
+ * integer part. As a result, leading zeroes are
+ * mapped to floats and integers which matches
+ * Julia behavior. Other languages should decide
+ * if leading zero is valid or not. JSON
+ * disallows leading zero.
+ */
+#endif
+
+#ifdef LEX_JULIA_OCTAL_NUMERIC
+ /*
+ * This is the style of octal, not 100% Julia
+ * compatible. Also define Julia numeric to enforce
+ * lower case.
+ */
+#ifndef LEX_LOWER_CASE_NUMERIC_PREFIX
+ /* See also hex 0X. Julia v.0.3 uses lower case only here. */
+ case 'O':
+#endif
+ /*
+ * Julia accepts 0o700 as octal and 0b100 as
+ * binary, and 0xa00 as hex, and 0100 as
+ * integer, and 1e2 as 64 bit float and 1f2 as
+ * 32 bit float. Julia 0.3 does not support
+ * octal and binary fractions.
+ */
+ case 'o':
+ while (++p != q && lex_isoctdigit(*p)) {
+ }
+ lex_emit_octal(s, p);
+ /* Avoid hitting int fall through. */
+ continue;
+#endif
+#ifdef LEX_BINARY_NUMERIC
+ /* Binary in C++14. */
+ case 'b':
+#ifndef LEX_LOWER_CASE_NUMERIC_PREFIX
+ /* See also hex 0X. Julia v.0.3 uses lower case only here. */
+ case 'B':
+#endif
+ while (++p != q && lex_isbindigit(*p)) {
+ }
+ lex_emit_binary(s, p);
+ /* Avoid hitting int fall through. */
+ continue;
+#endif
+#ifdef LEX_HEX_NUMERIC
+ case 'x':
+#ifndef LEX_LOWER_CASE_NUMERIC_PREFIX
+ /*
+ * Julia v0.3 does not allow this, it thinks 0X1 is
+ * 0 * X1, X1 being an identifier.
+ * while 0x1 is a hex value due to precedence.
+ *
+ * TODO: This might change.
+ */
+
+ case 'X':
+#endif
+ while (++p != q && lex_ishexdigit(*p)) {
+ }
+#ifdef LEX_HEX_FLOAT_NUMERIC
+ /*
+ * Most hexadecimal floating poing conversion
+ * functions, including Pythons
+ * float.fromhex("0x1.0"), Julias parse
+ * function, and and C strtod on
+ * supporting platforms, will parse without
+ * exponent. The same languages do not support
+ * literal constants without the p exponent.
+ * First it is named p because e is a hex digit,
+ * second, the float suffix f is also a hex
+ * digit: 0x1.f is ambigious in C without that
+ * rule. Conversions have no such ambiguity.
+ * In Julia, juxtaposition means that 0x1.f
+ * could mean 0x1p0 * f or 0x1.fp0.
+ *
+ * Since we are not doing conversion here but
+ * lexing a stream, we opt to require the p
+ * suffix because making it optional could end
+ * up consuming parts of the next token.
+ *
+ * But, we also make a flag to make the exponent
+ * optional, anyway. It could be used for better
+ * error reporting than just consuming the hex
+ * part since we likely should accept the ambigous
+ * syntax either way.
+ */
+ d = p;
+ if (p != q && *p == '.') {
+ while (++p != q && lex_ishexdigit(*p)) {
+ }
+ }
+ if (p != q && (*p == 'p' || *p == 'P')) {
+ if (++p != q && *p != '+' && *p != '-') {
+ --p;
+ }
+ /* The exponent is a decimal power of 2. */
+ while (++p != q && lex_isdigit(*p)) {
+ }
+ lex_emit_hex_float(s, p);
+ continue;
+ }
+#ifdef LEX_HEX_FLOAT_OPTIONAL_EXPONENT
+ if (d != p) {
+ lex_emit_hex_float(s, p);
+ continue;
+ }
+#else
+ /*
+ * Backtrack to decimal point. We require p to
+ * be present because we could otherwise consume
+ * part of the next token.
+ */
+ p = d;
+#endif
+#endif /* LEX_HEX_FLOAT_NUMERIC */
+ lex_emit_hex(s, p);
+ continue;
+#endif /* LEX_HEX_NUMERIC */
+
+ default:
+ /*
+ * This means leading zeroes like 001 or 001.0 are
+ * treated like like int and float respectively,
+ * iff C octals are flaggged out. Otherwise they
+ * become 001(octal), and 001(octal),.0(float)
+ * which should be treated as an error because
+ * future extensions might allow octal floats.
+ * (Not likely, but interpretion is ambigious).
+ */
+ break;
+ } /* Switch under '0' case. */
+
+ /*
+ * Pure single digit '0' is an octal number in the C
+ * spec. We have the option to treat it as an integer,
+ * or as an octal. For strict C behavior, this can be
+ * flagged in, but is disabled by default. It only
+ * applies to single digit 0. Thus, with C octal
+ * enabled, leading zeroes always go octal.
+ */
+ } /* If condition around switch under '0' case. */
+ --p;
+ goto lex_fallthrough_1; /* silence warning */
+
+ lex_fallthrough_1:
+ /* Leading integer digit in C integers. */
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ while (++p && lex_isdigit(*p)) {
+ }
+ d = p;
+ if (*p == '.') {
+/* Silence unused label warnings when features are disabled. */
+#ifdef LEX_C_OCTAL_NUMERIC
+lex_c_octal_to_fraction_part:
+#endif
+lex_dot_to_fraction_part:
+ while (++p != q && lex_isdigit(*p)) {
+ }
+ }
+ if (p != q && (*p == 'e' || *p == 'E')) {
+/* Silence unused label warnings when features are disabled. */
+#ifdef LEX_C_OCTAL_NUMERIC
+lex_c_octal_to_exponent_part:
+#endif
+ if (++p != q && *p != '+' && *p != '-') {
+ --p;
+ }
+ while (++p != q && lex_isdigit(*p)) {
+ }
+ }
+ if (d != p) {
+ lex_emit_float(s, p);
+ } else {
+#ifdef LEX_C_OCTAL_NUMERIC
+ if (*s == '0') {
+ lex_emit_octal(s, p);
+ continue;
+ }
+#endif
+ lex_emit_int(s, p);
+ }
+ continue;
+
+#ifndef LEX_ID_WITHOUT_UNDERSCORE
+ case '_':
+#endif
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+
+ /*
+ * We do not try to ensure utf-8 is terminated correctly nor
+ * that any unicode character above ASCII is a character
+ * suitable for identifiers.
+ *
+ * tag is calculated for keyword lookup, and we assume these
+ * are always ASCII-7bit. It has the form: length, first
+ * char, second, char, last char in lsb to msb order. If the
+ * second char is missing, it becomes '\0'. The tag is not
+ * entirely unique, but suitable for fast lookup.
+ *
+ * If utf-8 appears in tag, the tag is undefined except the
+ * length is valid or overflows (meaning longer than any
+ * keyword and thus safe to compare against if tag matches).
+ *
+ * If the grammar is case insensitive, the tag be can
+ * downcased trivially by or'ring with 0x20202000 which
+ * preserves the length field (clever design by ASCII
+ * designers). After tag matching, a case insentive
+ * compare is obviously also needed against the full lexeme.
+ */
+
+ {
+ unsigned long tag;
+
+ tag = (unsigned long)*p << 8;
+ if (++p != q && lex_isalnum(*p)) {
+ tag |= (unsigned long)*p << 16;
+ while (++p != q && lex_isalnum(*p)) {
+ }
+ }
+ tag |= (unsigned long)p[-1] << 24;
+ tag |= (unsigned char)(p - s) + (unsigned long)'0';
+ lex_emit_id(s, p, tag);
+ continue;
+ }
+
+ default:
+
+#ifdef LEX_ID_WITH_UTF8
+ /*
+ * Identifier again, in case it starts with a utf-8 lead
+ * character. This time we can ignore the tag, except the
+ * length char must be valid to avoid buffer overruns
+ * on potential kw check upstream.
+ */
+ if (*p & '\x80') {
+ unsigned long tag;
+
+ while (++p != q && lex_isalnum(*p)) {
+ }
+ tag = (unsigned char)(p - s) + '0';
+ lex_emit_id(s, p, tag);
+ continue;
+ }
+#endif
+ ++p;
+ /* normally 0x7f DEL and 0x00..0x1f incl. */
+ if (lex_isctrl(*s) && !lex_isblank(*s)) {
+ lex_emit_ctrl(s);
+ } else {
+ lex_emit_symbol(*s, s, p);
+ }
+ continue;
+ } /* Main switch in normal mode. */
+ } /* Main while loop in normal mode. */
+
+lex_mode_exit:
+ if (mode == LEX_MODE_INVALID) {
+ return mode;
+ }
+
+#ifndef LEX_DISABLE_ZTERM
+ if (p != q && lex_iszterm(*p)) {
+ lex_emit_eos(s, p);
+ return mode;
+ }
+#endif
+ lex_emit_eob(p);
+ return mode;
+}
+
diff --git a/external/lex/luthor.h b/external/lex/luthor.h
new file mode 100644
index 0000000..6ca373d
--- /dev/null
+++ b/external/lex/luthor.h
@@ -0,0 +1,472 @@
+/*
+ * Mostly generic lexer that can be hacked to suit specific syntax. See
+ * more detailed comments further down in this file.
+ *
+ * Normally include luthor.c instead of luthor.h so emitter functions
+ * can be custom defined, and optionally also fast keyword definitions.
+ *
+ * At the very minimum, define lex_emit which other emitters default to.
+ *
+ * Create a wrapper function to drive the lex function in said file.
+ *
+ * Use this header in separate parser logic to access the token values
+ * if relevant.
+ */
+
+#ifndef LUTHOR_H
+#define LUTHOR_H
+
+#ifdef LEX_KEYWORDS
+#include <string.h> /* memcmp for kw match */
+#endif
+
+#include "tokens.h"
+
+#ifndef lex_emit
+#define lex_emit(token, first, last) ((void)0)
+#endif
+
+/*
+ * Default for comments, bom, and other things that are not necessarily
+ * of interest to the parser, but may be to buffer wrap handling,
+ * debugging, and pretty printers.
+ */
+#ifndef lex_emit_other
+#define lex_emit_other(token, first, last) ((void)0)
+#endif
+
+#ifndef lex_emit_eof
+#define lex_emit_eof(pos) lex_emit(LEX_TOK_EOF, pos, pos)
+#endif
+
+#ifndef lex_emit_abort
+#define lex_emit_abort(pos) lex_emit(LEX_TOK_ABORT, pos, pos)
+#endif
+
+#ifndef lex_emit_eob
+#define lex_emit_eob(pos) lex_emit(LEX_TOK_EOB, pos, pos)
+#endif
+
+#ifndef lex_emit_eos
+#define lex_emit_eos(first, last) lex_emit(LEX_TOK_EOS, first, last)
+#endif
+
+#ifndef lex_emit_bom
+#define lex_emit_bom(first, last) lex_emit_other(LEX_TOK_BOM, first, last)
+#endif
+
+#ifndef lex_emit_id
+#ifdef LEX_KEYWORDS
+/* LEX_KW_TABLE_BEGIN .. LEX_KEYWORD_TABLE_END defines lex_match_kw. */
+#define lex_emit_id(first, last, tag) lex_emit(lex_match_kw(tag, first), first, last)
+#else
+#define lex_emit_id(first, last, tag) lex_emit(LEX_TOK_ID, first, last)
+#endif
+#endif
+
+/*
+ * This is a default for unknown symbols. It may be treated as an error,
+ * or it can be processed further by the parser instead of customizing
+ * the lexer. It ensures that there is always a token for every part of
+ * the input stream.
+ */
+#ifndef lex_emit_symbol
+#define lex_emit_symbol(token, first, last) lex_emit(LEX_TOK_SYMBOL, first, last)
+#endif
+
+/*
+ * Control characters 0x01 .. 0x1f, 0x7f(DEL), excluding \0\r\n\t which have
+ * separate tokens.
+ *
+ * Control characters in strings and comments are passed on as body
+ * elements, except \0\r\n which breaks the string up.
+ */
+#ifndef lex_emit_ctrl
+#define lex_emit_ctrl(pos) lex_emit(LEX_TOK_CTRL, pos, pos + 1)
+#endif
+
+#ifndef lex_emit_string_ctrl
+#define lex_emit_string_ctrl(pos) lex_emit(LEX_TOK_STRING_CTRL, pos, pos + 1)
+#endif
+
+#ifndef lex_emit_comment_ctrl
+#define lex_emit_comment_ctrl(pos) lex_emit_other(LEX_TOK_COMMENT_CTRL, pos, pos + 1)
+#endif
+
+/*
+ * This enables user to both count lines, and to calculate character
+ * offset for subsequent lexemes. New line starts a lexeme, line break
+ * symbol is located at lexeme - skipped and with have length 2 if \r\n
+ * or \n\r break, and 1 otherwise.
+ */
+#ifndef lex_emit_newline
+#define lex_emit_newline(first, last) lex_emit(LEX_TOK_NEWLINE, first, last)
+#endif
+
+#ifndef lex_emit_string_newline
+#define lex_emit_string_newline(first, last) lex_emit(LEX_TOK_STRING_NEWLINE, first, last)
+#endif
+
+#ifndef lex_emit_int
+#define lex_emit_int(first, last) lex_emit(LEX_TOK_INT, first, last)
+#endif
+
+#ifndef lex_emit_float
+#define lex_emit_float(first, last) lex_emit(LEX_TOK_FLOAT, first, last)
+#endif
+
+#ifndef lex_emit_int_suffix
+#define lex_emit_int_suffix(first, last) lex_emit(LEX_TOK_INT_SUFFIX, first, last)
+#endif
+
+#ifndef lex_emit_float_suffix
+#define lex_emit_floatint_suffix(first, last) lex_emit(LEX_TOK_FLOAT_SUFFIX, first, last)
+#endif
+
+#ifndef lex_emit_binary
+#define lex_emit_binary(first, last) lex_emit(LEX_TOK_BINARY, first, last)
+#endif
+
+#ifndef lex_emit_octal
+#define lex_emit_octal(first, last) lex_emit(LEX_TOK_OCTAL, first, last)
+#endif
+
+#ifndef lex_emit_hex
+#define lex_emit_hex(first, last) lex_emit(LEX_TOK_HEX, first, last)
+#endif
+
+#ifndef lex_emit_hex_float
+#define lex_emit_hex_float(first, last) lex_emit(LEX_TOK_HEX_FLOAT, first, last)
+#endif
+
+/*
+ * The comment token can be used to aid backtracking during buffer
+ * switch.
+ */
+#ifndef lex_emit_comment_begin
+#define lex_emit_comment_begin(first, last, is_doc) \
+ lex_emit_other(LEX_TOK_COMMENT_BEGIN, first, last)
+#endif
+
+#ifndef lex_emit_comment_part
+#define lex_emit_comment_part(first, last) lex_emit_other(LEX_TOK_COMMENT_PART, first, last)
+#endif
+
+#ifndef lex_emit_comment_end
+#define lex_emit_comment_end(first, last) lex_emit_other(LEX_TOK_COMMENT_END, first, last)
+#endif
+
+#ifndef lex_emit_comment_unterminated
+#define lex_emit_comment_unterminated(pos) \
+ lex_emit_other(LEX_TOK_COMMENT_UNTERMINATED, pos, pos)
+#endif
+
+#ifndef lex_emit_comment_deeply_nested
+#define lex_emit_comment_deeply_nested(pos) \
+ lex_emit_other(LEX_TOK_COMMENT_DEEPLY_NESTED, pos, pos)
+#endif
+
+#ifndef lex_emit_string_begin
+#define lex_emit_string_begin(first, last) lex_emit(LEX_TOK_STRING_BEGIN, first, last)
+#endif
+
+#ifndef lex_emit_string_part
+#define lex_emit_string_part(first, last) lex_emit(LEX_TOK_STRING_PART, first, last)
+#endif
+
+#ifndef lex_emit_string_end
+#define lex_emit_string_end(first, last) lex_emit(LEX_TOK_STRING_END, first, last)
+#endif
+
+#ifndef lex_emit_string_escape
+#define lex_emit_string_escape(first, last) lex_emit(LEX_TOK_STRING_ESCAPE, first, last)
+#endif
+
+#ifndef lex_emit_string_unterminated
+#define lex_emit_string_unterminated(pos) \
+ lex_emit(LEX_TOK_STRING_UNTERMINATED, pos, pos)
+#endif
+
+#ifndef lex_emit_blank
+#define lex_emit_blank(first, last) \
+ lex_emit_other(LEX_TOK_BLANK, first, last)
+#endif
+
+#ifndef lex_emit_op
+#define lex_emit_op(op, first, last) lex_emit((long)(op), first, last)
+#endif
+
+#ifndef lex_emit_compound_op
+#define lex_emit_compound_op(op1, op2, first, last) \
+ lex_emit(((long)(op1) | ((long)(op2) << 8)), first, last)
+#endif
+
+#ifndef lex_emit_tricompound_op
+#define lex_emit_tricompound_op(op1, op2, op3, first, last) \
+ lex_emit(((long)(op1) | ((long)(op2) << 8)) | \
+ ((long)(op3)<<16), first, last)
+#endif
+
+#ifndef lex_emit_quadcompound_op
+#define lex_emit_quadcompound_op(op1, op2, op3, op4, first, last) \
+ lex_emit(((long)(op1) | ((long)(op2) << 8)) | \
+ ((long)(op3) << 16) | ((long)(op4) << 24), first, last)
+#endif
+
+/* Used to limit number of nested comment level. */
+#ifndef LEX_MAX_NESTING_LEVELS
+#define LEX_MAX_NESTING_LEVELS 100
+#endif
+
+
+/* Keyword handling macros, see `keywords.c` for an example usage. */
+#ifdef LEX_KEYWORDS
+
+/*
+ * This implements a switch statement branching on the 4 character
+ * keyword tag (unsigned long value) which is produced by the lexers id
+ * recognizer. A final check is needed with to ensure an exact
+ * match with a given id. Two keywords rarely conflicts, but it is
+ * possible, and therefore kw_begin kw_match kw_match ... kw_end is used
+ * to cover this.
+ *
+ * See example usage elsewhere for details.
+ *
+ * The first element x0 is length '0'..'9' and ensure comparisons will
+ * not overrun the buffer where the lexeme is stored during string
+ * comparison, iff the keywords report the length correctly.
+ *
+ * The next elements in the tag are the first, second, and last
+ * character of lexeme / keyword, replacing second character with '\0'
+ * on single length keywords, so keyword 'e' is tagged '1', 'e', '\0', 'e',
+ * and 'while' is tagged '5' 'w', 'h', 'e', where the length is lsb
+ * and last chararacter is msb.
+ *
+ * An enum with tok_kw_<name> elements is expected to provide return
+ * values on match. These should start at LEX_TOK_KW_BASE and are
+ * negative.
+ *
+ */
+#define lex_kw_begin(x0, x1, x2, x3) \
+ case \
+ ((unsigned long)(x0) | \
+ ((unsigned long)(x1) << 8) | \
+ ((unsigned long)(x2) << 16) | \
+ ((unsigned long)(x3) << 24)) :
+
+#define lex_kw_match(kw) \
+ if (memcmp(#kw, lexeme, sizeof(#kw) - 1) == 0) \
+ return tok_kw_##kw;
+
+#define lex_kw_end() \
+ break;
+
+#define lex_kw(kw, x0, x1, x2, x3) \
+ lex_kw_begin(x0, x1, x2, x3) \
+ lex_kw_match(kw) \
+ lex_kw_end()
+
+static long lex_match_kw(unsigned long tag, const char *lexeme);
+
+/* Static so multiple grammers are possible in a single program. */
+#define LEX_KW_TABLE_BEGIN \
+static long lex_match_kw(unsigned long tag, const char *lexeme) \
+{ \
+ switch (tag) { \
+
+#define LEX_KW_TABLE_END \
+ default: \
+ break; \
+ } \
+ return LEX_TOK_KW_NOT_FOUND; \
+}
+
+#else
+
+/* Allow flagging in and out without unused warning or missing macros */
+#define lex_kw_begin(x0, x1, x2, x3)
+#define lex_kw_match(kw)
+#define lex_kw_end()
+#define lex_kw(kw, x0, x1, x2, x3)
+#define LEX_KEYWORD_TABLE_BEGIN
+#define LEX_KEYWORD_TABLE_END
+
+#endif /* LEX_KEYWORDS */
+
+
+
+/*
+ * Modes used for recovery when switching to a new buffer and handling
+ * internal state changes for strings and comments.
+ */
+enum {
+ /* Always 0, is initial lexer state. */
+ LEX_MODE_NORMAL = 0,
+
+ /* Returned if lex is given unsupported mode. */
+ LEX_MODE_INVALID = 1,
+
+ /*
+ * Can be used in place of normal mode to consume optional bom
+ * marker at buffer start. Only utf-8 bom is supported.
+ */
+ LEX_MODE_BOM,
+
+ /*
+ * Returned at end of buffer if mid string or mid comment, may also
+ * be larger for nested comments as nesting level is encoded.
+ */
+ LEX_MODE_C_STRING,
+ LEX_MODE_C_STRING_SQ,
+ LEX_MODE_PYTHON_BLOCK_STRING,
+ LEX_MODE_PYTHON_BLOCK_STRING_SQ,
+ LEX_MODE_C_BLOCK_COMMENT,
+ LEX_MODE_LINE_COMMENT,
+ LEX_MODE_JULIA_NESTED_COMMENT,
+
+
+ /* Counter embedded in mode. */
+ LEX_MODE_COUNT_BASE = 16,
+};
+
+
+
+/* ON CALLING AND USING LEX FUNCTION
+ *
+ * If utf-8 BOM possible, detect this before calling the lexer and
+ * advance the buffer. JSON explititly disallows BOM, but recommends
+ * consuming it if present. If some other Unicode BOM is found, convert
+ * the buffer first. The lexer assumes ALL non-ascii characters are
+ * valid trailing identifiers which mostly works well. Strings with
+ * broken utf-8 are passed on as is. utf-8 identifiers must be enabled
+ * with #define LEX_ENABLE_UTF8_ID
+ *
+ * If required, postprocess identifiers and strings for valid utf-8. It
+ * is assumed that all keywords are at most 9 characters long and always
+ * ASCII. Otherwise post process them in a hash table on identifier
+ * event. This enables a fast compiled trie lookup of keywords.
+ *
+ * Newline and control characters are always emitted, also inside
+ * strings and comments. The exception is \r, \n, \t, \0 which are
+ * handled specially, or if the lexer is adapted to handle certain
+ * control characters specially.
+ *
+ * Each token is not guaranteed correct, only to be delimited correct,
+ * if it is indeed correct. Only very few tokens can be zero length, for
+ * example, the parser can rely on string part token not being empty
+ * which is important in dealing with line continuation. The end of
+ * buffer token is empty, and so is the unterminates string token, and
+ * also the comment end token for single line tokens, but not the
+ * multi-line version. There is a token for every part of the input
+ * stream, but the parser can easily define some to be ignored and have
+ * them optimized out.
+ *
+ * Strings have start token, and optionally sequences of control,
+ * escape, and newline tokens, followed by either string end token or
+ * string unterminated token. Strings delimiters can be one
+ * (single-line) or three double quotes (multi-line, like python, but
+ * cannot be single quotes, unlike Python. Python, C and Javascript
+ * string continuation is handled by having the parser observing string
+ * escape followed by newline token. Escape is always a single
+ * character '\' token, and the parser is responsible for consuming the
+ * following content. If string syntax with double delimiter is used to
+ * define escaped delimiter, this will occur as two separate strings
+ * with no space between. The parser can handle this on its own; if, in
+ * such strings, '\"' does not mean escaped delimiter, the string will
+ * not terminate correctly, and the lexer must be apapted. Unterminated
+ * string may happen at end of buffer, also for single line comments.
+ * This is because the string might continue in a new buffer. The parser
+ * should deal with this.
+ *
+ * Comments always start with a start token, followed by zero or more
+ * comment part tokens interleaved with control and newline tokens,
+ * terminated by either comment end token, or unterminated comment
+ * token. If the comment is single, the unterminated comment token may
+ * appear at the last line instead of the expected end of comment token
+ * because the comment might continue in a new buffer. The parser
+ * should deal with this. Escapes and line continuations have no effects
+ * in comments, unlike strings.
+ *
+ * The lexer only carries one state variable: the mode. The mode can be
+ * normal (default and equals zero), or single or multi string or
+ * comment modes. These modes are used to to recover after switching
+ * buffers as discussed below.
+ *
+ * The lexer can run to completion without involving the parser and
+ * could be used to pipeline tokens into another thread for concurrent
+ * parsing which is safe since the input buffer is considered read-only.
+ *
+ *
+ * KEYWORDS
+ *
+ * Keywords are treated as identifiers by default. By including a
+ * keyword table the `lex_emit_id` macro will check if the id is a
+ * keyword and translate the token if it is. Using the provided keyword
+ * table macros is just one way to do it. This is better explained by
+ * looking at an example. Keyword lookup based on the precomputed keyword
+ * tag provided to the lookup function are limited to 9 characters, but a
+ * custom lookup function need not use it and then the tag precomputation
+ * will be optimized out.
+ *
+ * Keywords are defined by the lookup function and should be negative
+ * starting at LEX_TOK_KW_BASE to avoid conflicts with other token types.
+ *
+ *
+ * WRAPPING MULTIPLE BUFFERS
+ *
+ * The user may need to deal with multiple buffers because data may
+ * arrive asynchronously over the network, and may have many concurrent
+ * lexing jobs. The emitter part is not difficult since a ring buffer
+ * can grow, or the parser can be called directly (except queuing a few
+ * tokens for backtracking as we shall see).
+ *
+ * If the lexer were an explicit statemachine as in Flex, we could get
+ * an yywrap event to fill buffers, but our state is on the stack and in
+ * registers for optimization. We may use co-routines, but it doesn't
+ * cover all issues, and, as it turns out is not necessary with the
+ * following restrictions on syntax:
+ *
+ * All variable length tokens such as numerics and identifiers are
+ * limited in length. Strings and comments are not, but are broken into
+ * zero, one, or several body tokens per line. ANSI-C limits line length
+ * to 509 characters (allowing for continuation and two byte linebreaks
+ * in a 512 byte buffer). But JSON has no line continuation for strings
+ * and may (and often do) store everything on a single line. Whitespace
+ * can also extend beyond given limit.
+ *
+ * If we ignore whitespace, strings and comments, we can discard the
+ * last token (or last two in case there are paired tokens, such as
+ * leading zero followed by numeric. Parsing can then resume in a new
+ * buffer where the first 512 bytes (or similar) are duplicated from the
+ * previous buffer. The lexer is then restarted at the last token (pair)
+ * start which may turn out to change the length or even introduce a
+ * different result such introducing leading zero. The lexer need no
+ * specific state to do this.
+ *
+ * For strings and comments, we need a flag to allow entering the lexer
+ * mid string or mid comment. The newline and line continuation tokens
+ * need to be dropped, and the last body may need to be truncated as it
+ * can embed a partial delimiter. The simplest way to deal with this is
+ * to backtrack tokens until the last token begins at a safe position,
+ * about 3-6 charaters earlier, and truncating body segments that span
+ * this barrier. Whitespace can also be truncated.
+ *
+ * We can generalize this further by going at least K bytes back in an N
+ * overlap buffer region and require non-strings (and non-comments) to
+ * not exceed N-K bytes, where K and N are specific to the syntax and
+ * the I/O topology.
+ *
+ * We can add flags to tokens that can help decide how to enter
+ * backtracking mode without covering every possible scanner loop - i.e.
+ * are we mid string, mid comment, single-line or multi-line.
+ *
+ * All the lexer needs to do then, is to receive the backtracking mode
+ * flags. A wrapping driver can deal with backtrack logic, which is
+ * specific to how tokens are emitted. Whitespace need no recovery mode
+ * but perhaps new whitespace should extend existing to simplify
+ * parsing.
+ */
+
+
+#endif /* LUTHOR_H */
+
diff --git a/external/lex/tokens.h b/external/lex/tokens.h
new file mode 100644
index 0000000..2bdbd7c
--- /dev/null
+++ b/external/lex/tokens.h
@@ -0,0 +1,554 @@
+#ifndef LEX_TOKENS_H
+#define LEX_TOKENS_H
+
+/* Define LEX_DEBUG to enable token printing and describing functions. */
+
+
+enum {
+
+ /*
+ * EOF is not emitted by lexer, but may be used by driver after
+ * last buffer is processed.
+ */
+ LEX_TOK_EOF = 0,
+
+ /*
+ * Either EOB or EOS is emitted as the last token before exit,
+ * or also ABORT in some lexers. Unterminated string or comment
+ * will be emitted immediately one of these when relevant.
+ *
+ * It may be useful to redefine lex_emit_eos and lex_emit_eob to
+ * produce LEX_TOK_EOF or error directly for simple string lexing.
+ */
+ LEX_TOK_EOB = 1,
+ LEX_TOK_EOS = 2,
+
+ /*
+ * ABORT can be used for early exit by some lexers while other
+ * lexers may choose to run to buffer end regardless of input (with
+ * the exception of deeply nested comments).
+ */
+ LEX_TOK_ABORT = 3,
+
+ /*
+ * Byte order marker. Only happen if lexer was started in bom mode
+ * and the input stream contains a leading bom marker.
+ * The token can only be the first token in the stream. Utf-8 is the
+ * only supported bom, but the lexeme may be checked in case other
+ * boms are added later. Normally it is routed to lex_emit_other
+ * along with comments so it just ignores the bom if present. It is
+ * generally recommended to consume utf-8 bom for interoperability,
+ * but also to not store it for the same reason.
+ */
+ LEX_TOK_BOM,
+
+ /*
+ * Any control character that is not newline or blank will be
+ * emitted as single character token here. This token is discussed
+ * in several comments below. For strings and comments, also
+ * blank control characters will be emitted since they are usually
+ * not desired unexpectd.
+ */
+ LEX_TOK_CTRL,
+ LEX_TOK_STRING_CTRL,
+ LEX_TOK_COMMENT_CTRL,
+
+ /*
+ * Any printable ASCII character that is not otherwise consumed will
+ * be issued as a single length symbol token. Further discussion
+ * below. The symbol and CTRL tokens ensure that the entire input
+ * stream is covered by tokens. If utf-8 identifies have not been
+ * flagged, utf-8 leading characters may also end up here, and so
+ * my utf-8 characters in general, that are not viewed as valid
+ * identifiers (depending on configuration).
+ */
+ LEX_TOK_SYMBOL,
+
+ /*
+ * Variable length identifier starting with (_A-Za-z) by default and
+ * followed by zero or more (_A-Za-z0-9) characters. (_) can be
+ * flagged out. utf-8 can be flagged in. Be default any non-ASCII
+ * character (0x80 and above), is treated as part of an identifier
+ * for simplicity and speed, but this may be redefined. Any broken
+ * utf-8 is not sanitized, thus 0x80 would be a valid identifier
+ * token with utf-8 identifiers enabled, and otherwise it would be a
+ * symbol token.
+ *
+ * The ID does a magic trick: It maps the lexeme to a very simple
+ * and fast 32 bit hash code called a tag. The tag is emitted with
+ * the id token and can be used for fast keyword lookup. The
+ * hash tag is:
+ *
+ * (length)(first char)(second char)(last char)
+ *
+ * where length is ASCII '0' .. '9' where any length overflow is an
+ * arbitrary value, but such that the length is never longer than
+ * the lexeme. The last char is the last char regardless of length.
+ * For short identifiers, the second char may be the first char
+ * duplicated, and the last char may be first char.
+ *
+ * This code is very simple to write by hand: "5whe" means while,
+ * and can be used in a case switch before a strcmp with "while".
+ * Conflicts are possible, but then several keywords are tested like
+ * any other hash conflict. This keyword lookup is user driven, but
+ * can follow example code quite straightforward.
+ *
+ * The lex_emit_id macro can be implemented to provide the above
+ * lookup and inject a keyword token instead. By convention such
+ * tokens have negative values to avoid conflicts with lexer
+ * generated tokens.
+ *
+ * The ID also has a special role in prefixes and suffixes: C string
+ * literals like (L"hello") and numeric literals like (42f) are
+ * lexed as two tokens, one of which is an ID. The parser must
+ * process this and observe absence of whitespace where such syntax
+ * is relevant.
+ *
+ * While not specific to ID, the emitter macroes can be designed to
+ * keep track of start of lines and end of whitespace and attach
+ * state flags to each token (at line start, after whitespace). The
+ * whitespace tokens can then be dropped. This might help parsing
+ * things like suffixes efficiently.
+ */
+ LEX_TOK_ID,
+
+ /*
+ * C-int :: pos-dec-digit dec-digit *
+ * Julia-int ::= dec-digit+
+ *
+ * pos-dec-digit ::= '1'..'9'
+ * dec-digit ::= '0'..'9'
+ *
+ * Floating point numbers take precedence when possible so 00.10 is
+ * always a deciaml floating point value when decimal floats are
+ * enabled.
+ *
+ * The C-int is automatically enabled if C-octals are enabled, and
+ * disabled otherwise. There is no specific Julia-int type - we just
+ * use the terminology to represent integers with leading zeroes.
+ *
+ * Julia style integers accept leading zeroes. C style integers with
+ * leading zeroes are consumed as C style octal numbers, so 0019 is
+ * parsed as either 0019(Julia-int), or 001(C-octal), 9(C-int).
+ *
+ * Single digit '0' maps to octal when C-octals are enabled and to
+ * Julia-int otherwise. (Yes, integers are not that simple, it
+ * seems).
+ *
+ * Both C and Julia octal numbers (see octal token) can be active
+ * simultaneously. This can be used to control leading zero
+ * behavior, even if C-octal numbers are not part of the grammar
+ * being parsed. For example, a language might use 0o777 octal
+ * numbers and disallow 0777 integers. Enabling C-octals makes this
+ * easy to detect (but should accept octal 0).
+ *
+ * There is no destinction between the styles in the int token, but
+ * leading zeroes are easily detected in the lexeme.
+ *
+ * Constant suffixes like 1L are treated as 1(INT), and L(ID). The
+ * same goes for other numeric values.
+ *
+ * Parser should check for leading zeroes and decide if it is valid,
+ * a warning, or an error (it is in JSON). This also goes for float.
+ *
+ * Numericals, not limited to INT, may appear shorter than they are
+ * due to buffer splits. Special recovery is required, but will only
+ * happen just before EOS or EOB tokens (i.e. buffer split events).
+ */
+ LEX_TOK_INT,
+
+ /*
+ * float ::= (int ['.' dec-digits*] dec-exponent)
+ * | ([int] '.' dec-digits* [dec-exponent])
+ * dec-exponents ::= ('e' | 'E') ['+' | '-'] dec-digits*
+ * dec-digits ::= '0'..'9'
+ * int ::= dec-digits*
+ *
+ * Consumes a superset of C float representation without suffix.
+ * Some invalid tokens such as 0.E are accepted. Valid tokens such
+ * as 00.10 take precedence over octal numbers even if it is a
+ * prefix, and the same is obviously true with respect to decimal
+ * integers.
+ *
+ * JSON does not allow leading zeroes, and also not leading '.'.
+ * This can easily be checked in the lexeme.
+ *
+ * The octal notation affecting integer leading zeroes is not
+ * relevant to floats because floats take precedence over octal and
+ * decimal int when containing '.', 'e' or 'E'.
+ */
+ LEX_TOK_FLOAT,
+
+ /*
+ * binary ::= (0b | 0B) ('0' | '1')*
+ *
+ * 0b100 or just 0b, parser must check that digits are present,
+ * otherwise it may be interpreted as zero, just like octal zero
+ * in C.
+ *
+ * Like 0X hex, 0B can be flagged out because Julia v0.3 does not
+ * support uppercase 0B.
+ */
+ LEX_TOK_BINARY,
+
+ /*
+ * C-octal ::= 0 octal-digit*
+ * octal-digits ::= '0'..'7'
+ *
+ * Julia-octal ::= 0o octal-digits*
+ * octal-digits ::= '0'..'7'
+ *
+ * 0777 for C style octal numbers, or 0o777 for Julia syntax. Julia
+ * v.0.3 does not allow uppercase 0O777, it would mean 0 * O777.
+ *
+ * When enabled, decimal floating points take precedence: 00.10 is
+ * parsed as 00.10(decimal float), as per C standard.
+ *
+ * NOTE: It is possible for both styles to be active simultaneously.
+ * This may be relevant in order to control handling of leading
+ * zeroes in decimal integers.
+ *
+ * If C-octal numbers are flagged out, leading zeroes are mapped to
+ * integers and the numerical value may change. Julia behaves this
+ * way. Nothing prevents support of both C and Julia octal numbers,
+ * but leading zeroes will then be interpreted the C way - it is not
+ * recommended to do this.
+ */
+ LEX_TOK_OCTAL,
+
+ /*
+ * hex ::= hex-int
+ * hex-digits ::= 'a'..'f'| 'A'..'f' | '0'..'9'
+ * hex-int ::= (0x | 0X) hex_digts*
+ *
+ * where hex_digits are customizable (e.g. all lower case), and hex
+ * prefix 0x can be flagged to be lower case only (as in Julia).
+ *
+ * If hex floats are enabled, they take precedence:
+ * 0x1.0(hex-float), if not, 0x1.0 will parse as: 0x1(hex) followed
+ * by .0(decimal float).
+ *
+ * The lead prefix 0x may by flagged to be lower case only because
+ * this is required by Julia v0.3 where 0X means 0 * X. Julia
+ * accepts uppercase in the remaining hex digits (and exponent for
+ * floats). This could possibly change in future versions.
+ *
+ * The zero length sequence (0x | 0X) is accepted and left to the
+ * parser since the lexer emits a token for everything it sees.
+ * Conceptually it may be interpreted as zero, equivalent to 0 being
+ * both octal prefix and numeric 0 in C style octal representation.
+ * Or it may be an error.
+ */
+ LEX_TOK_HEX,
+
+ /*
+ * hex_float ::= hex-int ['.' hex_digit*] hex-exponent
+ * hex-exponent ::= ('p' | 'P') ['+' | '-'] decimal-digit*
+ * decimal-digit ::= '0'..'9'
+ *
+ * A superset of IEEE-754-2008 Hexadecimal Floating Point notation.
+ *
+ * We require the exponent to be present, but does not ensure the
+ * value is otherwise complete, e.g. 0x1p+ would be accepted. The p
+ * is needed because otherwise 0x1.f could be accepted, and f is a
+ * float suffix in C, and juxtapostion factor (0x1. * f) in Julia,
+ * at least, that is one possible interpretation.
+ *
+ * The exponent can be flagged optional in which case 0x1.f will be
+ * consumed as a single hex float toke as a single hex float token.
+ * This may either simply be accepted in some grammars, or used to
+ * provide an error message. If the exponent is required, 0x1.f will
+ * be lexed as three tokens:
+ *
+ * <'0x1'(hex int), '.'(op), 'f'(id)>.
+ *
+ * Thus it may be a good idea to allow the exponent to be optional
+ * anyway and issue an error message or warning if the p is absent
+ * later in the parsing stage.
+ *
+ * Note that, as per IEEE-754, the exponent is a decimal power of
+ * two. In other words, the number of bits to shift the
+ * (hexa)decimal point. Also note that it is p and not e because e
+ * is a hex digit.
+ */
+ LEX_TOK_HEX_FLOAT,
+
+ /*
+ * blank ::= ('\t' | '\x20')+
+ *
+ * Longest run in buffer holding only '\t' and '\x20' (space).
+ *
+ * buffer splits may generate adjacent blanks depending on recovery
+ * processing. (The same goes for other line oriented runs such as
+ * string parts and comment parts).
+ */
+ LEX_TOK_BLANK,
+
+ /* newline ::= '\r' | '\n' | '\r\n' | '\n\r'
+ *
+ * Will always appear, also inside strings and comments. Can be used
+ * to track line starts and counts reliably as only one newline is
+ * issued at a time, and it is issued everywhere, also in strings
+ * and comments.
+ *
+ * May be preceeded by string escape token inside strings. This can
+ * be interpreted as line continuation within strings specifically,
+ * as is the case in Python and Javascript (and in C via
+ * pre-processor).
+ *
+ * The LEX_TOK_STRING_NEWLINE is emitted inside strings so the ordinary
+ * newline may be ignored in comments and other non-string content.
+ */
+ LEX_TOK_NEWLINE,
+ LEX_TOK_STRING_NEWLINE,
+
+ /*
+ * string ::= string_start
+ * (string_part | string_escape |
+ * string_ctrl | string_newline)*
+ * (string_end | string_unterminated)
+ *
+ * There are several optional string styles. They all start with
+ * this token. The length and content provided details. Python
+ * may start with """ or ''' and this token will then have length
+ * 3 and three quotes as lexeme content. If the lexer exits before
+ * string end token, the returned lexer mode will remember the
+ * state and can be used for reentry - this also goes for comments.
+ *
+ * Strings can only contain part, escape, newline, and control
+ * tokens, and either string unterminated or string end token
+ * at last.
+ */
+ LEX_TOK_STRING_BEGIN,
+
+ /* Longest run without control characters, without (\), without
+ * newline, and without the relevant end delimiter. The run may be
+ * shortened due to buffer splits. The part may, as an exception,
+ * begin with an end delimiter character or a (\) if it was
+ * preceeded by a string escape token. The escape character is
+ * always (\). Strings that use "" or '' as escape will be treated
+ * as start and end of separate strings. Strings that do not supoort
+ * (\) should just treat escape as a part of the string.
+ */
+ LEX_TOK_STRING_PART,
+
+ /*
+ * This is always a single character token (\) and only happens
+ * inside strings. See also string part token.
+ */
+ LEX_TOK_STRING_ESCAPE,
+
+ /* This token is similar to string start. It may be absent at buffer
+ * splits, but will then an unterminated string token will be used
+ * just before the split event token.
+ *
+ * */
+ LEX_TOK_STRING_END,
+
+ /*
+ * This is emitted before the buffer ends, or before unescaped
+ * newlines for line oriented string types (the usual strings).
+ * At buffer splits, recovery should clean it up. The returned
+ * mode allow parsing to continue in a new buffer with a slight
+ * content overlap.
+ *
+ * If string like ("hello, world!") in C, reaches end of line, it
+ * may be continued" ("hello, \)newline(world!"). If this line
+ * continuation is flagged out, this will lead to string
+ * unterminated, even if not at end of buffer. For block strings
+ * like """hello""", this only happens at end of buffer.
+ */
+ LEX_TOK_STRING_UNTERMINATED,
+
+ /*
+ *
+ * comment ::= comment_start
+ * (comment_part | ctrl | newline)*
+ * (comment_end | comment_unterminated)
+ *
+ *
+ * Comments work like strings in most respects. They emit parts, and
+ * control characters, but not escape characters, and cannot be
+ * continued at end of line. Block comments are like python block
+ * strings (''').
+ *
+ * Julia supports nested comments (#= ... #= =# =#). In this case
+ * a new start token can be emitted before an end token. If the
+ * parser exits due to buffer split, the mode has the nesting level
+ * encoded so it can resumed in a new buffer.
+ *
+ * Line comments will have their end token just before newline, or
+ * unterminated comment just before buffer split token (EOB or EOS).
+ * (\) characters are consumed by the comment part tokens and do not
+ * affect the end of any comment.
+ *
+ * Comment begin may include extra characters when a doc comment is
+ * recognized. The emitter flags this. End comments are unaffected.
+ */
+ LEX_TOK_COMMENT_BEGIN,
+ LEX_TOK_COMMENT_PART,
+ LEX_TOK_COMMENT_END,
+ LEX_TOK_COMMENT_UNTERMINATED,
+
+ /*
+ * Issued before ABORT token if nesting level is above a predefined
+ * level. This is to protect against malicious and misguided
+ * content, otherwise the nesting level counter could wrap and
+ * generate a different interpretation, which could be bad. The
+ * parser would probably do similar things with nested tokens.
+ */
+ LEX_TOK_COMMENT_DEEPLY_NESTED,
+
+
+ /* Operators are all recognized single character symbols, or up to
+ * four characters. The token value is the ASCII codes shifted 8
+ * bits per extra character, by default, but the emitter macros
+ * can redefine this. Values below 32 are reserved token types as
+ * discussed above.
+ *
+ * What exactly represents an operator depends on what the lexer has
+ * enabled.
+ *
+ * Printable ASCII symbols that are NOT recognized, are emitted as
+ * the SYMBOL token and is always length 1. The value can be derived
+ * from the lexeme, but not the token itself. This may be perfectly
+ * fine for the parser, or it may be used to indicate an error.
+ * There are no illegal characters per se.
+ *
+ * Non-printable ASCII characters that are not covered by newline or
+ * blank, are emitted as CTRL tokens. These act the same as the
+ * symbol token and may be used to indicate error, or to handle form
+ * feed and other whitespace not handled by default. Unlike symbol,
+ * however, CTRL also appear in strings and comments since they are
+ * generally not allowed and this makes it easy to capture (there is
+ * virtually no performance overhead in providing this service
+ * unless attempting to parse a binary format).
+ */
+
+ /* Don't bleed into this range. */
+ LEX_TOK_OPERATOR_BASE = 32,
+
+
+ /*
+ * Operators use ASCII range.
+ * Compound operators use range 0x80 to 0x7fff
+ * and possibly above for triple sequences.
+ * Custom keywords are normally negative but can be mapped
+ * to any other.
+ *
+ * The layout is designed for efficient table lookup.
+ * Compound operators might benefit from remapping down to a smaller
+ * range for compact lookup tables, but it depends on the parser.
+ */
+};
+
+/*
+ * Custom keyword token range is negative, and well below -99..0 where
+ * special codes are reserved.
+ */
+#ifndef LEX_TOK_KW_BASE
+#define LEX_TOK_KW_BASE -1000
+#endif
+
+#ifndef LEX_TOK_KW_NOT_FOUND
+#define LEX_TOK_KW_NOT_FOUND LEX_TOK_ID
+#endif
+
+
+#ifdef LEX_DEBUG
+
+#include <stdio.h>
+#include <string.h>
+
+static const char *lex_describe_token(long token)
+{
+ switch(token) {
+ case LEX_TOK_BOM: return "BOM marker";
+ case LEX_TOK_EOF: return "EOF";
+ case LEX_TOK_EOS: return "buffer zero terminated";
+ case LEX_TOK_EOB: return "buffer exhausted";
+ case LEX_TOK_ABORT: return "abort";
+ case LEX_TOK_CTRL: return "control";
+ case LEX_TOK_STRING_CTRL: return "string control";
+ case LEX_TOK_COMMENT_CTRL: return "comment control";
+ case LEX_TOK_SYMBOL: return "symbol";
+ case LEX_TOK_ID: return "identifier";
+ case LEX_TOK_INT: return "integer";
+ case LEX_TOK_FLOAT: return "float";
+ case LEX_TOK_BINARY: return "binary";
+ case LEX_TOK_OCTAL: return "octal";
+ case LEX_TOK_HEX: return "hex";
+ case LEX_TOK_HEX_FLOAT: return "hex float";
+ case LEX_TOK_BLANK: return "blank";
+ case LEX_TOK_NEWLINE: return "newline";
+ case LEX_TOK_STRING_NEWLINE: return "string newline";
+ case LEX_TOK_STRING_BEGIN: return "string begin";
+ case LEX_TOK_STRING_PART: return "string part";
+ case LEX_TOK_STRING_END: return "string end";
+ case LEX_TOK_STRING_ESCAPE: return "string escape";
+ case LEX_TOK_STRING_UNTERMINATED: return "unterminated string";
+ case LEX_TOK_COMMENT_BEGIN: return "comment begin";
+ case LEX_TOK_COMMENT_PART: return "comment part";
+ case LEX_TOK_COMMENT_END: return "comment end";
+ case LEX_TOK_COMMENT_UNTERMINATED: return "unterminated comment";
+ case LEX_TOK_COMMENT_DEEPLY_NESTED: return "deeply nested comment";
+
+ default:
+ if (token < LEX_TOK_EOF) {
+ return "keyword";
+ }
+ if (token < 32) {
+ return "undefined";
+ }
+ if (token < 0x100L) {
+ return "operator";
+ }
+ if (token < 0x10000L) {
+ return "compound operator";
+ }
+ if (token < 0x1000000L) {
+ return "tricompound operator";
+ }
+ if (token < 0x7f0000000L) {
+ return "quadcompound operator";
+ }
+ return "reserved";
+ }
+}
+
+static void lex_fprint_token(FILE *fp,
+ long token,
+ const char *first, const char *last,
+ int line, int pos)
+{
+ char buf[10];
+ const char *lexeme = first;
+ int len = (int)(last - first);
+ switch (token) {
+ case LEX_TOK_EOS:
+ case LEX_TOK_CTRL:
+ sprintf(buf, "^%02x", (int)*first);
+ lexeme = buf;
+ len = strlen(buf);
+ break;
+ default:
+ break;
+ }
+ fprintf(fp, "%04d:%03d %s (0x%lx): `%.*s`\n",
+ line, pos, lex_describe_token(token), token, len, lexeme);
+}
+
+#define lex_print_token(token, first, last, line, pos) \
+ lex_fprint_token(stdout, token, first, last, line, pos)
+
+#else /* LEX_DEBUG */
+
+#define lex_describe_token(token) "debug not available"
+#define lex_fprint_token(fp, token, first, last, line, pos) ((void)0)
+#define lex_print_token(token, first, last, line, pos) ((void)0)
+
+#endif /* LEX_DEBUG */
+
+
+#endif /* LEX_TOKENS_H */
+