2 files changed, 3039 insertions, 135 deletions
diff --git a/src/lib/third_party/include/roaring.h b/src/lib/third_party/include/roaring.h
index 500ba9cb9..4e356ef9a 100644
--- a/src/lib/third_party/include/roaring.h
+++ b/src/lib/third_party/include/roaring.h
@@ -1,5 +1,8 @@
+#ifdef USE_ROARING_V2
+#include "roaring_v2.h"
+#else
 // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
-// Created by amalgamation.sh on 2023-02-12T11:34:02Z
+// Created by amalgamation.sh on 2024-03-20T03:56:45Z
 
 /*
  * The CRoaring project is under a dual license (Apache/MIT).
@@ -58,11 +61,11 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.6"
+#define ROARING_VERSION "3.0.0"
 enum { 
-    ROARING_VERSION_MAJOR = 0,
-    ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 6
+    ROARING_VERSION_MAJOR = 3,
+    ROARING_VERSION_MINOR = 0,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 
 /* end file include/roaring/roaring_version.h */
@@ -78,10 +81,11 @@ enum {
 #include <stdint.h>
 
 #ifdef __cplusplus
-extern "C" { namespace roaring { namespace api {
+extern "C" {
+namespace roaring {
+namespace api {
 #endif
 
-
 /**
  * When building .c files as C++, there's added compile-time checking if the
  * container types are derived from a `container_t` base class.  So long as
@@ -95,12 +99,12 @@ extern "C" { namespace roaring { namespace api {
  * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;`
  */
 #if defined(__cplusplus)
-    extern "C++" {
-      struct container_s {};
-    }
-    #define ROARING_CONTAINER_T ::roaring::api::container_s
+extern "C++" {
+struct container_s {};
+}
+#define ROARING_CONTAINER_T ::roaring::api::container_s
 #else
-    #define ROARING_CONTAINER_T void  // no compile-time checking
+#define ROARING_CONTAINER_T void  // no compile-time checking
 #endif
 
 #define ROARING_FLAG_COW UINT8_C(0x1)
@@ -125,15 +129,14 @@ typedef struct roaring_array_s {
     uint8_t flags;
 } roaring_array_t;
 
-
 typedef bool (*roaring_iterator)(uint32_t value, void *param);
 typedef bool (*roaring_iterator64)(uint64_t value, void *param);
 
 /**
-*  (For advanced users.)
-* The roaring_statistics_t can be used to collect detailed statistics about
-* the composition of a roaring bitmap.
-*/
+ *  (For advanced users.)
+ * The roaring_statistics_t can be used to collect detailed statistics about
+ * the composition of a roaring bitmap.
+ */
 typedef struct roaring_statistics_s {
     uint32_t n_containers; /* number of containers */
 
@@ -166,12 +169,912 @@ typedef struct roaring_statistics_s {
     // and n_values_arrays, n_values_rle, n_values_bitmap
 } roaring_statistics_t;
 
+/**
+ * Roaring-internal type used to iterate within a roaring container.
+ */
+typedef struct roaring_container_iterator_s {
+    // For bitset and array containers this is the index of the bit / entry.
+    // For run containers this points at the run.
+    int32_t index;
+} roaring_container_iterator_t;
+
 #ifdef __cplusplus
-} } }  // extern "C" { namespace roaring { namespace api {
+}
+}
+}  // extern "C" { namespace roaring { namespace api {
 #endif
 
 #endif /* ROARING_TYPES_H */
 /* end file include/roaring/roaring_types.h */
+/* begin file include/roaring/portability.h */
+/*
+ * portability.h
+ *
+ */
+
+/**
+ * All macros should be prefixed with either CROARING or ROARING.
+ * The library uses both ROARING_...
+ * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for
+ * macros that are provided by the build system or that are closely
+ * related to the format. The header macros may also use ROARING_.
+ * The CROARING_ prefix is for internal macros that a user is unlikely
+ * to ever interact with.
+ */
+
+#ifndef INCLUDE_PORTABILITY_H_
+#define INCLUDE_PORTABILITY_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif  // _GNU_SOURCE
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS 1
+#endif  // __STDC_FORMAT_MACROS
+
+#ifdef _MSC_VER
+#define CROARING_VISUAL_STUDIO 1
+/**
+ * We want to differentiate carefully between
+ * clang under visual studio and regular visual
+ * studio.
+ */
+#ifdef __clang__
+// clang under visual studio
+#define CROARING_CLANG_VISUAL_STUDIO 1
+#else
+// just regular visual studio (best guess)
+#define CROARING_REGULAR_VISUAL_STUDIO 1
+#endif  // __clang__
+#endif  // _MSC_VER
+#ifndef CROARING_VISUAL_STUDIO
+#define CROARING_VISUAL_STUDIO 0
+#endif
+#ifndef CROARING_CLANG_VISUAL_STUDIO
+#define CROARING_CLANG_VISUAL_STUDIO 0
+#endif
+#ifndef CROARING_REGULAR_VISUAL_STUDIO
+#define CROARING_REGULAR_VISUAL_STUDIO 0
+#endif
+
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
+#undef _POSIX_C_SOURCE
+#endif
+
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif  // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
+#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
+#define _XOPEN_SOURCE 700
+#endif  // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
+
+#ifdef __illumos__
+#define __EXTENSIONS__
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>  // will provide posix_memalign with _POSIX_C_SOURCE as defined above
+#ifdef __GLIBC__
+#include <malloc.h>  // this should never be needed but there are some reports that it is needed.
+#endif
+
+#ifdef __cplusplus
+extern "C" {  // portability definitions are in global scope, not a namespace
+#endif
+
+#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
+#error This code assumes  64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+#ifndef __restrict__
+#define __restrict__ __restrict
+#endif  // __restrict__
+#endif  // CROARING_REGULAR_VISUAL_STUDIO
+
+#if defined(__x86_64__) || defined(_M_X64)
+// we have an x64 processor
+#define CROARING_IS_X64 1
+
+#if defined(_MSC_VER) && (_MSC_VER < 1910)
+// Old visual studio systems won't support AVX2 well.
+#undef CROARING_IS_X64
+#endif
+
+#if defined(__clang_major__) && (__clang_major__ <= 8) && !defined(__AVX2__)
+// Older versions of clang have a bug affecting us
+// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces
+#undef CROARING_IS_X64
+#endif
+
+#ifdef ROARING_DISABLE_X64
+#undef CROARING_IS_X64
+#endif
+// we include the intrinsic header
+#if !CROARING_REGULAR_VISUAL_STUDIO
+/* Non-Microsoft C/C++-compatible compiler */
+#include <x86intrin.h>  // on some recent GCC, this will declare posix_memalign
+
+#if CROARING_CLANG_VISUAL_STUDIO
+
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn,  we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h>  (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+// To avoid reordering imports:
+// clang-format off
+#include <bmiintrin.h>   // for _blsr_u64
+#include <lzcntintrin.h> // for  __lzcnt64
+#include <immintrin.h>   // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>
+#if _MSC_VER >= 1920
+// Important: we need the AVX-512 headers:
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512cdintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512vbmiintrin.h>
+#include <avx512vbmi2intrin.h>
+#include <avx512vpopcntdqintrin.h>
+// clang-format on
+#endif  // _MSC_VER >= 1920
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif  //  _blsr_u64
+#endif  // SIMDJSON_CLANG_VISUAL_STUDIO
+
+#endif  // CROARING_REGULAR_VISUAL_STUDIO
+#endif  // defined(__x86_64__) || defined(_M_X64)
+
+#if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
+#define CROARING_USENEON
+#endif
+#if defined(CROARING_USENEON)
+#include <arm_neon.h>
+#endif
+
+#if !CROARING_REGULAR_VISUAL_STUDIO
+/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
+ * assembly */
+#define CROARING_INLINE_ASM 1
+#endif  // _MSC_VER
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+/* Microsoft C/C++-compatible compiler */
+#include <intrin.h>
+
+#ifndef __clang__  // if one compiles with MSVC *with* clang, then these
+                   // intrinsics are defined!!!
+#define CROARING_INTRINSICS 1
+// sadly there is no way to check whether we are missing these intrinsics
+// specifically.
+
+/* wrappers for Visual Studio built-ins that look like gcc built-ins
+ * __builtin_ctzll */
+/** result might be undefined when input_num is zero */
+static inline int roaring_trailing_zeroes(unsigned long long input_num) {
+    unsigned long index;
+#ifdef _WIN64  // highly recommended!!!
+    _BitScanForward64(&index, input_num);
+#else   // if we must support 32-bit Windows
+    if ((uint32_t)input_num != 0) {
+        _BitScanForward(&index, (uint32_t)input_num);
+    } else {
+        _BitScanForward(&index, (uint32_t)(input_num >> 32));
+        index += 32;
+    }
+#endif  // _WIN64
+    return index;
+}
+
+/* wrappers for Visual Studio built-ins that look like gcc built-ins
+ * __builtin_clzll */
+/** result might be undefined when input_num is zero */
+static inline int roaring_leading_zeroes(unsigned long long input_num) {
+    unsigned long index;
+#ifdef _WIN64  // highly recommended!!!
+    _BitScanReverse64(&index, input_num);
+#else   // if we must support 32-bit Windows
+    if (input_num > 0xFFFFFFFF) {
+        _BitScanReverse(&index, (uint32_t)(input_num >> 32));
+        index += 32;
+    } else {
+        _BitScanReverse(&index, (uint32_t)(input_num));
+    }
+#endif  // _WIN64
+    return 63 - index;
+}
+
+/* Use #define so this is effective even under /Ob0 (no inline) */
+#define roaring_unreachable __assume(0)
+#endif  // __clang__
+
+#endif  // CROARING_REGULAR_VISUAL_STUDIO
+
+#ifndef CROARING_INTRINSICS
+#define CROARING_INTRINSICS 1
+#define roaring_unreachable __builtin_unreachable()
+/** result might be undefined when input_num is zero */
+static inline int roaring_trailing_zeroes(unsigned long long input_num) {
+    return __builtin_ctzll(input_num);
+}
+/** result might be undefined when input_num is zero */
+static inline int roaring_leading_zeroes(unsigned long long input_num) {
+    return __builtin_clzll(input_num);
+}
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+#define ALIGNED(x) __declspec(align(x))
+#elif defined(__GNUC__) || defined(__clang__)
+#define ALIGNED(x) __attribute__((aligned(x)))
+#else
+#warning "Warning. Unrecognized compiler."
+#define ALIGNED(x)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define CROARING_WARN_UNUSED __attribute__((warn_unused_result))
+#else
+#define CROARING_WARN_UNUSED
+#endif
+
+#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
+
+#ifdef CROARING_USENEON
+// we can always compute the popcount fast.
+#elif (defined(_M_ARM) || defined(_M_ARM64)) && \
+    ((defined(_WIN64) || defined(_WIN32)) &&    \
+     defined(CROARING_REGULAR_VISUAL_STUDIO) && \
+     CROARING_REGULAR_VISUAL_STUDIO)
+// we will need this function:
+static inline int roaring_hamming_backup(uint64_t x) {
+    uint64_t c1 = UINT64_C(0x5555555555555555);
+    uint64_t c2 = UINT64_C(0x3333333333333333);
+    uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F);
+    x -= (x >> 1) & c1;
+    x = ((x >> 2) & c2) + (x & c2);
+    x = (x + (x >> 4)) & c4;
+    x *= UINT64_C(0x0101010101010101);
+    return x >> 56;
+}
+#endif
+
+static inline int roaring_hamming(uint64_t x) {
+#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
+    CROARING_REGULAR_VISUAL_STUDIO
+#ifdef CROARING_USENEON
+    return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+#elif defined(_M_ARM64)
+    return roaring_hamming_backup(x);
+    // (int) _CountOneBits64(x); is unavailable
+#else   // _M_ARM64
+    return (int)__popcnt64(x);
+#endif  // _M_ARM64
+#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
+    CROARING_REGULAR_VISUAL_STUDIO
+#ifdef _M_ARM
+    return roaring_hamming_backup(x);
+    // _CountOneBits is unavailable
+#else   // _M_ARM
+    return (int)__popcnt((unsigned int)x) +
+           (int)__popcnt((unsigned int)(x >> 32));
+#endif  // _M_ARM
+#else
+    return __builtin_popcountll(x);
+#endif
+}
+
+#ifndef UINT64_C
+#define UINT64_C(c) (c##ULL)
+#endif  // UINT64_C
+
+#ifndef UINT32_C
+#define UINT32_C(c) (c##UL)
+#endif  // UINT32_C
+
+#ifdef __cplusplus
+}  // extern "C" {
+#endif  // __cplusplus
+
+// this is almost standard?
+#undef STRINGIFY_IMPLEMENTATION_
+#undef STRINGIFY
+#define STRINGIFY_IMPLEMENTATION_(a) #a
+#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)
+
+// Our fast kernels require 64-bit systems.
+//
+// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
+// Furthermore, the number of SIMD registers is reduced.
+//
+// On 32-bit ARM, we would have smaller registers.
+//
+// The library should still have the fallback kernel. It is
+// slower, but it should run everywhere.
+
+//
+// Enable valid runtime implementations, and select
+// CROARING_BUILTIN_IMPLEMENTATION
+//
+
+// We are going to use runtime dispatch.
+#if CROARING_IS_X64
+#ifdef __clang__
+// clang does not have GCC push pop
+// warning: clang attribute push can't be used within a namespace in clang up
+// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be
+// *outside* of a namespace.
+#define CROARING_TARGET_REGION(T)                                      \
+    _Pragma(STRINGIFY(clang attribute push(__attribute__((target(T))), \
+                                           apply_to = function)))
+#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop")
+#elif defined(__GNUC__)
+// GCC is easier
+#define CROARING_TARGET_REGION(T) \
+    _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
+#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options")
+#endif  // clang then gcc
+
+#endif  // CROARING_IS_X64
+
+// Default target region macros don't do anything.
+#ifndef CROARING_TARGET_REGION
+#define CROARING_TARGET_REGION(T)
+#define CROARING_UNTARGET_REGION
+#endif
+
+#define CROARING_TARGET_AVX2 \
+    CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt")
+#define CROARING_TARGET_AVX512                                         \
+    CROARING_TARGET_REGION(                                            \
+        "avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw," \
+        "avx512vbmi2,avx512bitalg,avx512vpopcntdq")
+#define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION
+#define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION
+
+#ifdef __AVX2__
+// No need for runtime dispatching.
+// It is unnecessary and harmful to old clang to tag regions.
+#undef CROARING_TARGET_AVX2
+#define CROARING_TARGET_AVX2
+#undef CROARING_UNTARGET_AVX2
+#define CROARING_UNTARGET_AVX2
+#endif
+
+#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && \
+    defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) &&                  \
+    defined(__AVX512VPOPCNTDQ__)
+// No need for runtime dispatching.
+// It is unnecessary and harmful to old clang to tag regions.
+#undef CROARING_TARGET_AVX512
+#define CROARING_TARGET_AVX512
+#undef CROARING_UNTARGET_AVX512
+#define CROARING_UNTARGET_AVX512
+#endif
+
+// Allow unaligned memory access
+#if defined(__GNUC__) || defined(__clang__)
+#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
+#else
+#define ALLOW_UNALIGNED
+#endif
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+#define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(_WIN32)
+#define CROARING_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || \
+    defined(__FreeBSD__)  // defined __BYTE_ORDER__ && defined
+                          // __ORDER_BIG_ENDIAN__
+#include <machine/endian.h>
+#elif defined(sun) || \
+    defined(__sun)  // defined(__APPLE__) || defined(__FreeBSD__)
+#include <sys/byteorder.h>
+#else  // defined(__APPLE__) || defined(__FreeBSD__)
+
+#ifdef __has_include
+#if __has_include(<endian.h>)
+#include <endian.h>
+#endif  //__has_include(<endian.h>)
+#endif  //__has_include
+
+#endif  // defined(__APPLE__) || defined(__FreeBSD__)
+
+#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
+#define CROARING_IS_BIG_ENDIAN 0
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define CROARING_IS_BIG_ENDIAN 0
+#else  // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define CROARING_IS_BIG_ENDIAN 1
+#endif  // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#endif
+
+// Host <-> big endian conversion.
+#if CROARING_IS_BIG_ENDIAN
+#define croaring_htobe64(x) (x)
+
+#elif defined(_WIN32) || defined(_WIN64)  // CROARING_IS_BIG_ENDIAN
+#include <stdlib.h>
+#define croaring_htobe64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)  // CROARING_IS_BIG_ENDIAN
+#include <libkern/OSByteOrder.h>
+#define croaring_htobe64(x) OSSwapInt64(x)
+
+#elif defined(__has_include) && \
+    __has_include(<byteswap.h>)  // CROARING_IS_BIG_ENDIAN
+#include <byteswap.h>
+#define croaring_htobe64(x) __bswap_64(x)
+
+#else  // CROARING_IS_BIG_ENDIAN
+// Gets compiled to bswap or equivalent on most compilers.
+#define croaring_htobe64(x)                                                    \
+    (((x & 0x00000000000000FFULL) << 56) |                                     \
+     ((x & 0x000000000000FF00ULL) << 40) |                                     \
+     ((x & 0x0000000000FF0000ULL) << 24) |                                     \
+     ((x & 0x00000000FF000000ULL) << 8) | ((x & 0x000000FF00000000ULL) >> 8) | \
+     ((x & 0x0000FF0000000000ULL) >> 24) |                                     \
+     ((x & 0x00FF000000000000ULL) >> 40) |                                     \
+     ((x & 0xFF00000000000000ULL) >> 56))
+#endif  // CROARING_IS_BIG_ENDIAN
+#define croaring_be64toh(x) croaring_htobe64(x)
+// End of host <-> big endian conversion.
+
+// Defines for the possible CROARING atomic implementations
+#define CROARING_ATOMIC_IMPL_NONE 1
+#define CROARING_ATOMIC_IMPL_CPP 2
+#define CROARING_ATOMIC_IMPL_C 3
+#define CROARING_ATOMIC_IMPL_C_WINDOWS 4
+
+// If the use has forced a specific implementation, use that, otherwise,
+// figure out the best implementation we can use.
+#if !defined(CROARING_ATOMIC_IMPL)
+#if defined(__cplusplus) && __cplusplus >= 201103L
+#ifdef __has_include
+#if __has_include(<atomic>)
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
+#endif  //__has_include(<atomic>)
+#else
+   // We lack __has_include to check:
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
+#endif  //__has_include
+#elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C
+#elif CROARING_REGULAR_VISUAL_STUDIO
+   // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS
+#endif
+#endif  // !defined(CROARING_ATOMIC_IMPL)
+
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+#include <stdatomic.h>
+typedef _Atomic(uint32_t) croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    // Increasing the reference counter can always be done with
+    // memory_order_relaxed: New references to an object can only be formed from
+    // an existing reference, and passing an existing reference from one thread
+    // to another must already provide any required synchronization.
+    atomic_fetch_add_explicit(val, 1, memory_order_relaxed);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    // It is important to enforce any possible access to the object in one
+    // thread (through an existing reference) to happen before deleting the
+    // object in a different thread. This is achieved by a "release" operation
+    // after dropping a reference (any access to the object through this
+    // reference must obviously happened before), and an "acquire" operation
+    // before deleting the object.
+    bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1;
+    if (is_zero) {
+        atomic_thread_fence(memory_order_acquire);
+    }
+    return is_zero;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+    return atomic_load_explicit(val, memory_order_relaxed);
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
+#include <atomic>
+typedef std::atomic<uint32_t> croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    val->fetch_add(1, std::memory_order_relaxed);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    // See above comments on the c11 atomic implementation for memory ordering
+    bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1;
+    if (is_zero) {
+        std::atomic_thread_fence(std::memory_order_acquire);
+    }
+    return is_zero;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+    return val->load(std::memory_order_relaxed);
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS
+#include <intrin.h>
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedDecrement)
+
+// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and
+// overflow is defined to wrap, so we can pretend it is a uint32_t for our case
+typedef volatile long croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    _InterlockedIncrement(val);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    return _InterlockedDecrement(val) == 0;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+    // Per
+    // https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
+    // > Simple reads and writes to properly-aligned 32-bit variables are atomic
+    // > operations. In other words, you will not end up with only one portion
+    // > of the variable updated; all bits are updated in an atomic fashion.
+    return *val;
+}
+//#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE
+#else
+#include <assert.h>
+typedef uint32_t croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    *val += 1;
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    assert(*val > 0);
+    *val -= 1;
+    return val == 0;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+    return *val;
+}
+//#else
+//#error "Unknown atomic implementation"
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define CROARING_DEPRECATED __attribute__((deprecated))
+#else
+#define CROARING_DEPRECATED
+#endif  // defined(__GNUC__) || defined(__clang__)
+
+// We need portability.h to be included first,
+// but we also always want isadetection.h to be
+// included (right after).
+// See https://github.com/RoaringBitmap/CRoaring/issues/394
+// There is no scenario where we want portability.h to
+// be included, but not isadetection.h: the latter is a
+// strict requirement.
+#endif                             /* INCLUDE_PORTABILITY_H_ */
+/* end file include/roaring/portability.h */
+/* begin file include/roaring/bitset/bitset.h */
+#ifndef CBITSET_BITSET_H
+#define CBITSET_BITSET_H
+
+// For compatibility with MSVC with the use of `restrict`
+#if (__STDC_VERSION__ >= 199901L) || \
+    (defined(__GNUC__) && defined(__STDC_VERSION__))
+#define CBITSET_RESTRICT restrict
+#else
+#define CBITSET_RESTRICT
+#endif  // (__STDC_VERSION__ >= 199901L) || (defined(__GNUC__) &&
+        // defined(__STDC_VERSION__ ))
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace api {
+#endif
+
+struct bitset_s {
+    uint64_t *CBITSET_RESTRICT array;
+    /* For simplicity and performance, we prefer to have a size and a capacity
+     * that is a multiple of 64 bits. Thus we only track the size and the
+     * capacity in terms of 64-bit words allocated */
+    size_t arraysize;
+    size_t capacity;
+};
+
+typedef struct bitset_s bitset_t;
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_t *bitset_create(void);
+
+/* Create a new bitset able to contain size bits. Return NULL in case of
+ * failure. */
+bitset_t *bitset_create_with_capacity(size_t size);
+
+/* Free memory. */
+void bitset_free(bitset_t *bitset);
+
+/* Set all bits to zero. */
+void bitset_clear(bitset_t *bitset);
+
+/* Set all bits to one. */
+void bitset_fill(bitset_t *bitset);
+
+/* Create a copy */
+bitset_t *bitset_copy(const bitset_t *bitset);
+
+/* For advanced users: Resize the bitset so that it can support newarraysize *
+ * 64 bits. Return true in case of success, false for failure. Pad with zeroes
+ * new buffer areas if requested. */
+bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes);
+
+/* returns how many bytes of memory the backend buffer uses */
+static inline size_t bitset_size_in_bytes(const bitset_t *bitset) {
+    return bitset->arraysize * sizeof(uint64_t);
+}
+
+/* returns how many bits can be accessed */
+static inline size_t bitset_size_in_bits(const bitset_t *bitset) {
+    return bitset->arraysize * 64;
+}
+
+/* returns how many words (64-bit) of memory the backend buffer uses */
+static inline size_t bitset_size_in_words(const bitset_t *bitset) {
+    return bitset->arraysize;
+}
+
+/* For advanced users: Grow the bitset so that it can support newarraysize * 64
+ * bits with padding. Return true in case of success, false for failure. */
+bool bitset_grow(bitset_t *bitset, size_t newarraysize);
+
+/* attempts to recover unused memory, return false in case of
+ * roaring_reallocation failure */
+bool bitset_trim(bitset_t *bitset);
+
+/* shifts all bits by 's' positions so that the bitset representing values
+ * 1,2,10 would represent values 1+s, 2+s, 10+s */
+void bitset_shift_left(bitset_t *bitset, size_t s);
+
+/* shifts all bits by 's' positions so that the bitset representing values
+ * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */
+void bitset_shift_right(bitset_t *bitset, size_t s);
+
+/* Set the ith bit. Attempts to resize the bitset if needed (may silently fail)
+ */
+static inline void bitset_set(bitset_t *bitset, size_t i) {
+    size_t shiftedi = i / 64;
+    if (shiftedi >= bitset->arraysize) {
+        if (!bitset_grow(bitset, shiftedi + 1)) {
+            return;
+        }
+    }
+    bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64);
+}
+
+/* Set the ith bit to the specified value. Attempts to resize the bitset if
+ * needed (may silently fail) */
+static inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) {
+    size_t shiftedi = i / 64;
+    uint64_t mask = ((uint64_t)1) << (i % 64);
+    uint64_t dynmask = ((uint64_t)flag) << (i % 64);
+    if (shiftedi >= bitset->arraysize) {
+        if (!bitset_grow(bitset, shiftedi + 1)) {
+            return;
+        }
+    }
+    uint64_t w = bitset->array[shiftedi];
+    w &= ~mask;
+    w |= dynmask;
+    bitset->array[shiftedi] = w;
+}
+
+/* Get the value of the ith bit.  */
+static inline bool bitset_get(const bitset_t *bitset, size_t i) {
+    size_t shiftedi = i / 64;
+    if (shiftedi >= bitset->arraysize) {
+        return false;
+    }
+    return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0;
+}
+
+/* Count number of bits set.  */
+size_t bitset_count(const bitset_t *bitset);
+
+/* Find the index of the first bit set. Or zero if the bitset is empty.  */
+size_t bitset_minimum(const bitset_t *bitset);
+
+/* Find the index of the last bit set. Or zero if the bitset is empty.  */
+size_t bitset_maximum(const bitset_t *bitset);
+
+/* compute the union in-place (to b1), returns true if successful, to generate a
+ * new bitset first call bitset_copy */
+bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1,
+                          const bitset_t *CBITSET_RESTRICT b2);
+
+/* report the size of the union (without materializing it) */
+size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1,
+                          const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the intersection in-place (to b1), to generate a new bitset first
+ * call bitset_copy */
+void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1,
+                                 const bitset_t *CBITSET_RESTRICT b2);
+
+/* report the size of the intersection (without materializing it) */
+size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1,
+                                 const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if the bitsets contain no common elements */
+bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1,
+                      const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if the bitsets contain any common elements */
+bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1,
+                       const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if b1 contains all of the set bits of b2 */
+bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1,
+                         const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the difference in-place (to b1), to generate a new bitset first call
+ * bitset_copy */
+void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1,
+                               const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the size of the difference */
+size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+                               const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the symmetric difference in-place (to b1), return true if successful,
+ * to generate a new bitset first call bitset_copy */
+bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1,
+                                         const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the size of the symmetric difference  */
+size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+                                         const bitset_t *CBITSET_RESTRICT b2);
+
+/* iterate over the set bits
+ like so :
+  for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) {
+    //.....
+  }
+  */
+static inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) {
+    size_t x = *i / 64;
+    if (x >= bitset->arraysize) {
+        return false;
+    }
+    uint64_t w = bitset->array[x];
+    w >>= (*i & 63);
+    if (w != 0) {
+        *i += roaring_trailing_zeroes(w);
+        return true;
+    }
+    x++;
+    while (x < bitset->arraysize) {
+        w = bitset->array[x];
+        if (w != 0) {
+            *i = x * 64 + roaring_trailing_zeroes(w);
+            return true;
+        }
+        x++;
+    }
+    return false;
+}
+
+/* iterate over the set bits
+ like so :
+   size_t buffer[256];
+   size_t howmany = 0;
+  for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256,
+ &startfrom)) > 0 ; startfrom++) {
+    //.....
+  }
+  */
+static inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
+                                   size_t capacity, size_t *startfrom) {
+    if (capacity == 0) return 0;  // sanity check
+    size_t x = *startfrom / 64;
+    if (x >= bitset->arraysize) {
+        return 0;  // nothing more to iterate over
+    }
+    uint64_t w = bitset->array[x];
+    w >>= (*startfrom & 63);
+    size_t howmany = 0;
+    size_t base = x << 6;
+    while (howmany < capacity) {
+        while (w != 0) {
+            uint64_t t = w & (~w + 1);
+            int r = roaring_trailing_zeroes(w);
+            buffer[howmany++] = r + base;
+            if (howmany == capacity) goto end;
+            w ^= t;
+        }
+        x += 1;
+        if (x == bitset->arraysize) {
+            break;
+        }
+        base += 64;
+        w = bitset->array[x];
+    }
+end:
+    if (howmany > 0) {
+        *startfrom = buffer[howmany - 1];
+    }
+    return howmany;
+}
+
+typedef bool (*bitset_iterator)(size_t value, void *param);
+
+// return true if uninterrupted
+static inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
+                            void *ptr) {
+  size_t base = 0, i;
+    for (i = 0; i < b->arraysize; ++i) {
+        uint64_t w = b->array[i];
+        while (w != 0) {
+            uint64_t t = w & (~w + 1);
+            int r = roaring_trailing_zeroes(w);
+            if (!iterator(r + base, ptr)) return false;
+            w ^= t;
+        }
+        base += 64;
+    }
+    return true;
+}
+
+static inline void bitset_print(const bitset_t *b) {
+    size_t i;
+    printf("{");
+    for (i = 0; bitset_next_set_bit(b, &i); i++) {
+        printf("%zu, ", i);
+    }
+    printf("}");
+}
+
+#ifdef __cplusplus
+}
+}
+}  // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif
+/* end file include/roaring/bitset/bitset.h */
 /* begin file include/roaring/roaring.h */
 /*
  * An implementation of Roaring Bitmaps in C.
@@ -181,12 +1084,16 @@ typedef struct roaring_statistics_s {
 #define ROARING_H
 
 #include <stdbool.h>
-#include <stdint.h>
 #include <stddef.h>  // for `size_t`
+#include <stdint.h>
+
 
+// Include other headers after roaring_types.h
 
 #ifdef __cplusplus
-extern "C" { namespace roaring { namespace api {
+extern "C" {
+namespace roaring {
+namespace api {
 #endif
 
 typedef struct roaring_bitmap_s {
@@ -206,8 +1113,9 @@ roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
  * Returns NULL if the allocation fails.
  * Client is responsible for calling `roaring_bitmap_free()`.
  */
-static inline roaring_bitmap_t *roaring_bitmap_create(void)
-  { return roaring_bitmap_create_with_capacity(0); }
+static inline roaring_bitmap_t *roaring_bitmap_create(void) {
+    return roaring_bitmap_create_with_capacity(0);
+}
 
 /**
  * Initialize a roaring bitmap structure in memory controlled by client.
@@ -221,13 +1129,14 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
  * The bitmap will be in a "clear" state, with no auxiliary allocations.
  * Since this performs no allocations, the function will not fail.
  */
-static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
-  { roaring_bitmap_init_with_capacity(r, 0); }
+static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) {
+    roaring_bitmap_init_with_capacity(r, 0);
+}
 
 /**
  * Add all the values between min (included) and max (excluded) that are at a
  * distance k*step from min.
-*/
+ */
 roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
                                             uint32_t step);
 
@@ -245,11 +1154,10 @@ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
  * do so for all of your bitmaps, since interactions between bitmaps with and
  * without COW is unsafe.
  */
-static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
+static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r) {
     return r->high_low_container.flags & ROARING_FLAG_COW;
 }
-static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
-                                                    bool cow) {
+static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) {
     if (cow) {
         r->high_low_container.flags |= ROARING_FLAG_COW;
     } else {
@@ -259,18 +1167,58 @@ static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
 
 roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
                                             int64_t offset);
-
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
 /**
  * Describe the inner structure of the bitmap.
  */
 void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
-#endif
 
 /**
  * Creates a new bitmap from a list of uint32_t integers
+ *
+ * This function is deprecated, use `roaring_bitmap_from` instead, which
+ * doesn't require the number of elements to be passed in.
+ *
+ * @see roaring_bitmap_from
  */
-roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+
+#ifdef __cplusplus
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring_bitmap_of_ptr` function instead.
+ */
+// Use an immediately invoked closure, capturing by reference
+// (in case __VA_ARGS__ refers to context outside the closure)
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring_bitmap_from(...)                                              \
+    [&]() {                                                                   \
+        const uint32_t roaring_bitmap_from_array[] = {0, __VA_ARGS__};        \
+        return roaring_bitmap_of_ptr((sizeof(roaring_bitmap_from_array) /     \
+                                      sizeof(roaring_bitmap_from_array[0])) - \
+                                         1,                                   \
+                                     &roaring_bitmap_from_array[1]);          \
+    }()
+#else
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring_bitmap_of_ptr` function instead.
+ */
+// While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof
+// expression, which is an unevaluated context, so it's even safe in the case
+// where expressions passed have side effects (roaring64_bitmap_from(my_func(),
+// ++i))
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring_bitmap_from(...)                                             \
+    roaring_bitmap_of_ptr(                                                   \
+        (sizeof((const uint32_t[]){0, __VA_ARGS__}) / sizeof(uint32_t)) - 1, \
+        &((const uint32_t[]){0, __VA_ARGS__})[1])
+#endif
 
 /**
  * Copies a bitmap (this does memory allocation).
@@ -285,16 +1233,18 @@ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
  *
  * It might be preferable and simpler to call roaring_bitmap_copy except
  * that roaring_bitmap_overwrite can save on memory allocations.
+ *
+ * Returns true if successful, or false if there was an error. On failure,
+ * the dest bitmap is left in a valid, empty state (even if it was not empty
+ * before).
  */
 bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
                               const roaring_bitmap_t *src);
 
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
 /**
  * Print the content of the bitmap.
  */
 void roaring_bitmap_printf(const roaring_bitmap_t *r);
-#endif
 
 /**
  * Computes the intersection between two bitmaps and returns new bitmap. The
@@ -321,10 +1271,10 @@ bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
                               const roaring_bitmap_t *r2);
 
 /**
- * Check whether a bitmap and a closed range intersect.
+ * Check whether a bitmap and an open range intersect.
  */
-bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
-                                         uint64_t x, uint64_t y);
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x,
+                                         uint64_t y);
 
 /**
  * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
@@ -502,15 +1452,15 @@ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
 /**
  * Add all values in range [min, max]
  */
-void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
-                                     uint32_t min, uint32_t max);
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min,
+                                     uint32_t max);
 
 /**
  * Add all values in range [min, max)
  */
-static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
-                                            uint64_t min, uint64_t max) {
-    if(max == min) return;
+static inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min,
+                                     uint64_t max) {
+    if (max <= min) return;
     roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
 }
 
@@ -522,15 +1472,15 @@ void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
 /**
  * Remove all values in range [min, max]
  */
-void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
-                                        uint32_t min, uint32_t max);
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min,
+                                        uint32_t max);
 
 /**
  * Remove all values in range [min, max)
  */
-static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
-                                               uint64_t min, uint64_t max) {
-    if(max == min) return;
+static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min,
+                                        uint64_t max) {
+    if (max <= min) return;
     roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
 }
 
@@ -556,12 +1506,11 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
  * to range_end (excluded) is present
  */
 bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
-                                   uint64_t range_start,
-                                   uint64_t range_end);
+                                   uint64_t range_start, uint64_t range_end);
 
 /**
- * Check if an items is present, using context from a previous insert for speed
- * optimization.
+ * Check if an items is present, using context from a previous insert or search
+ * for speed optimization.
  *
  * `context` will be used to store information between calls to make bulk
  * operations faster. `*context` should be zero-initialized before the first
@@ -591,11 +1540,10 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
                                           uint64_t range_end);
 
 /**
-* Returns true if the bitmap is empty (cardinality is zero).
-*/
+ * Returns true if the bitmap is empty (cardinality is zero).
+ */
 bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
 
-
 /**
  * Empties the bitmap.  It will have no auxiliary allocations (so if the bitmap
  * was initialized in client memory via roaring_bitmap_init(), then a call to
@@ -612,9 +1560,27 @@ void roaring_bitmap_clear(roaring_bitmap_t *r);
  */
 void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
 
+/**
+ * Store the bitmap to a bitset. This can be useful for people
+ * who need the performance and simplicity of a standard bitset.
+ * We assume that the input bitset is originally empty (does not
+ * have any set bit).
+ *
+ *   bitset_t * out = bitset_create();
+ *   // if the bitset has content in it, call "bitset_clear(out)"
+ *   bool success = roaring_bitmap_to_bitset(mybitmap, out);
+ *   // on failure, success will be false.
+ *   // You can then query the bitset:
+ *   bool is_present = bitset_get(out,  10011 );
+ *   // you must free the memory:
+ *   bitset_free(out);
+ *
+ */
+bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset);
 
 /**
- * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`.
+ * Convert the bitmap to a sorted array from `offset` by `limit`, output in
+ * `ans`.
  *
  * Caller is responsible to ensure that there is enough memory allocated, e.g.
  *
@@ -622,9 +1588,8 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
  *
  * Return false in case of failure (e.g., insufficient memory)
  */
-bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
-                                       size_t offset, size_t limit,
-                                       uint32_t *ans);
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
+                                       size_t limit, uint32_t *ans);
 
 /**
  * Remove run-length encoding even when it is more space efficient.
@@ -657,8 +1622,9 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
  *
  * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -668,12 +1634,30 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
  * (See `roaring_bitmap_portable_deserialize()` if you want a format that's
  * compatible with Java and Go implementations).
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
 
 /**
+ * Use with `roaring_bitmap_serialize()`.
+ *
+ * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
+ *
+ * The difference with `roaring_bitmap_deserialize()` is that this function
+ * checks that the input buffer is a valid bitmap.  If the buffer is too small,
+ * NULL is returned.
+ */
+roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf,
+                                                  size_t maxbytes);
+
+/**
  * How many bytes are required to serialize this bitmap (NOT compatible
  * with Java and Go versions)
  */
@@ -689,9 +1673,10 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
  *
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
-*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
 
@@ -702,8 +1687,23 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * The function itself is safe in the sense that it will not cause buffer
+ * overflows. However, for correct operations, it is assumed that the bitmap
+ * read was once serialized from a valid bitmap (i.e., it follows the format
+ * specification). If you provided an incorrect input (garbage), then the bitmap
+ * read may not be in a valid state and following operations may not lead to
+ * sensible results. In particular, the serialized array containers need to be
+ * in sorted order, and the run containers should be in sorted non-overlapping
+ * order. This is is guaranteed to happen when serializing an existing bitmap,
+ * but not for random inputs.
+ *
+ * You may use roaring_bitmap_internal_validate to check the validity of the
+ * bitmap prior to using it. You may also use other strategies to check for
+ * corrupted inputs (e.g., checksums).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
                                                            size_t maxbytes);
@@ -724,8 +1724,9 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
 
@@ -757,8 +1758,9 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -790,8 +1792,9 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
  * Serializes bitmap using frozen format.
  * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -806,8 +1809,9 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
  * Bitmap must be freed as usual, by calling roaring_bitmap_free().
  * Underlying buffer must not be freed or modified while it backs any bitmaps.
  *
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
  */
 const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
                                                    size_t length);
@@ -950,6 +1954,27 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
 uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
 
 /**
+ * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank`
+ * it puts rank value of each element in `[begin .. end)` to `ans[]`
+ *
+ * the values in `[begin .. end)` must be sorted in Ascending order;
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ *
+ *     ans = malloc((end-begin) * sizeof(uint64_t));
+ */
+void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t *begin,
+                              const uint32_t *end, uint64_t *ans);
+
+/**
+ * Returns the index of x in the given roaring bitmap.
+ * If the roaring bitmap doesn't contain x , this function will return -1.
+ * The difference with rank function is that this function will return -1 when x
+ * is not the element of roaring bitmap, but the rank function will return a
+ * non-negative number.
+ */
+int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x);
+
+/**
  * Returns the smallest value in the set, or UINT32_MAX if the set is empty.
  */
 uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
@@ -968,136 +1993,215 @@ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
 void roaring_bitmap_statistics(const roaring_bitmap_t *r,
                                roaring_statistics_t *stat);
 
+/**
+ * Perform internal consistency checks. Returns true if the bitmap is
+ * consistent. It may be useful to call this after deserializing bitmaps from
+ * untrusted sources. If roaring_bitmap_internal_validate returns true, then the
+ * bitmap should be consistent and can be trusted not to cause crashes or memory
+ * corruption.
+ *
+ * Note that some operations intentionally leave bitmaps in an inconsistent
+ * state temporarily, for example, `roaring_bitmap_lazy_*` functions, until
+ * `roaring_bitmap_repair_after_lazy` is called.
+ *
+ * If reason is non-null, it will be set to a string describing the first
+ * inconsistency found if any.
+ */
+bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r,
+                                      const char **reason);
+
 /*********************
 * What follows is code use to iterate through values in a roaring bitmap
 
 roaring_bitmap_t *r =...
 roaring_uint32_iterator_t i;
-roaring_create_iterator(r, &i);
+roaring_iterator_create(r, &i);
 while(i.has_value) {
   printf("value = %d\n", i.current_value);
-  roaring_advance_uint32_iterator(&i);
+  roaring_uint32_iterator_advance(&i);
 }
 
 Obviously, if you modify the underlying bitmap, the iterator
 becomes invalid. So don't.
 */
 
+/**
+ * A struct used to keep iterator state. Users should only access
+ * `current_value` and `has_value`, the rest of the type should be treated as
+ * opaque.
+ */
 typedef struct roaring_uint32_iterator_s {
-    const roaring_bitmap_t *parent;  // owner
-    int32_t container_index;         // point to the current container index
-    int32_t in_container_index;  // for bitset and array container, this is out
-                                 // index
-    int32_t run_index;           // for run container, this points  at the run
+    const roaring_bitmap_t *parent;        // Owner
+    const ROARING_CONTAINER_T *container;  // Current container
+    uint8_t typecode;                      // Typecode of current container
+    int32_t container_index;               // Current container index
+    uint32_t highbits;                     // High 16 bits of the current value
+    roaring_container_iterator_t container_it;
 
     uint32_t current_value;
     bool has_value;
-
-    const ROARING_CONTAINER_T
-        *container;  // should be:
-                     // parent->high_low_container.containers[container_index];
-    uint8_t typecode;  // should be:
-                       // parent->high_low_container.typecodes[container_index];
-    uint32_t highbits;  // should be:
-                        // parent->high_low_container.keys[container_index]) <<
-                        // 16;
-
 } roaring_uint32_iterator_t;
 
 /**
- * Initialize an iterator object that can be used to iterate through the
- * values. If there is a  value, then this iterator points to the first value
- * and `it->has_value` is true. The value is in `it->current_value`.
+ * Initialize an iterator object that can be used to iterate through the values.
+ * If there is a  value, then this iterator points to the first value and
+ * `it->has_value` is true. The value is in `it->current_value`.
  */
-void roaring_init_iterator(const roaring_bitmap_t *r,
+void roaring_iterator_init(const roaring_bitmap_t *r,
                            roaring_uint32_iterator_t *newit);
 
+/** DEPRECATED, use `roaring_iterator_init`. */
+CROARING_DEPRECATED static inline void roaring_init_iterator(
+    const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) {
+    roaring_iterator_init(r, newit);
+}
+
 /**
- * Initialize an iterator object that can be used to iterate through the
- * values. If there is a value, then this iterator points to the last value
- * and `it->has_value` is true. The value is in `it->current_value`.
+ * Initialize an iterator object that can be used to iterate through the values.
+ * If there is a value, then this iterator points to the last value and
+ * `it->has_value` is true. The value is in `it->current_value`.
  */
-void roaring_init_iterator_last(const roaring_bitmap_t *r,
+void roaring_iterator_init_last(const roaring_bitmap_t *r,
                                 roaring_uint32_iterator_t *newit);
 
+/** DEPRECATED, use `roaring_iterator_init_last`. */
+CROARING_DEPRECATED static inline void roaring_init_iterator_last(
+    const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) {
+    roaring_iterator_init_last(r, newit);
+}
+
 /**
  * Create an iterator object that can be used to iterate through the values.
  * Caller is responsible for calling `roaring_free_iterator()`.
  *
- * The iterator is initialized (this function calls `roaring_init_iterator()`)
+ * The iterator is initialized (this function calls `roaring_iterator_init()`)
  * If there is a value, then this iterator points to the first value and
  * `it->has_value` is true.  The value is in `it->current_value`.
  */
-roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r);
+roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r);
+
+/** DEPRECATED, use `roaring_iterator_create`. */
+CROARING_DEPRECATED static inline roaring_uint32_iterator_t *
+roaring_create_iterator(const roaring_bitmap_t *r) {
+    return roaring_iterator_create(r);
+}
 
 /**
-* Advance the iterator. If there is a new value, then `it->has_value` is true.
-* The new value is in `it->current_value`. Values are traversed in increasing
-* orders. For convenience, returns `it->has_value`.
-*/
-bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
+ * Advance the iterator. If there is a new value, then `it->has_value` is true.
+ * The new value is in `it->current_value`. Values are traversed in increasing
+ * orders. For convenience, returns `it->has_value`.
+ *
+ * Once `it->has_value` is false, `roaring_uint32_iterator_advance` should not
+ * be called on the iterator again. Calling `roaring_uint32_iterator_previous`
+ * is allowed.
+ */
+bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it);
+
+/** DEPRECATED, use `roaring_uint32_iterator_advance`. */
+CROARING_DEPRECATED static inline bool roaring_advance_uint32_iterator(
+    roaring_uint32_iterator_t *it) {
+    return roaring_uint32_iterator_advance(it);
+}
 
 /**
-* Decrement the iterator. If there's a new value, then `it->has_value` is true.
-* The new value is in `it->current_value`. Values are traversed in decreasing
-* order. For convenience, returns `it->has_value`.
-*/
-bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
+ * Decrement the iterator. If there's a new value, then `it->has_value` is true.
+ * The new value is in `it->current_value`. Values are traversed in decreasing
+ * order. For convenience, returns `it->has_value`.
+ *
+ * Once `it->has_value` is false, `roaring_uint32_iterator_previous` should not
+ * be called on the iterator again. Calling `roaring_uint32_iterator_advance` is
+ * allowed.
+ */
+bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it);
+
+/** DEPRECATED, use `roaring_uint32_iterator_previous`. */
+CROARING_DEPRECATED static inline bool roaring_previous_uint32_iterator(
+    roaring_uint32_iterator_t *it) {
+    return roaring_uint32_iterator_previous(it);
+}
 
 /**
  * Move the iterator to the first value >= `val`. If there is a such a value,
  * then `it->has_value` is true. The new value is in `it->current_value`.
  * For convenience, returns `it->has_value`.
  */
-bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it,
                                                 uint32_t val);
 
+/** DEPRECATED, use `roaring_uint32_iterator_move_equalorlarger`. */
+CROARING_DEPRECATED static inline bool
+roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+                                           uint32_t val) {
+    return roaring_uint32_iterator_move_equalorlarger(it, val);
+}
+
 /**
  * Creates a copy of an iterator.
  * Caller must free it.
  */
-roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+roaring_uint32_iterator_t *roaring_uint32_iterator_copy(
     const roaring_uint32_iterator_t *it);
 
+/** DEPRECATED, use `roaring_uint32_iterator_copy`. */
+CROARING_DEPRECATED static inline roaring_uint32_iterator_t *
+roaring_copy_uint32_iterator(const roaring_uint32_iterator_t *it) {
+    return roaring_uint32_iterator_copy(it);
+}
+
 /**
- * Free memory following `roaring_create_iterator()`
+ * Free memory following `roaring_iterator_create()`
  */
-void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
+void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it);
+
+/** DEPRECATED, use `roaring_uint32_iterator_free`. */
+CROARING_DEPRECATED static inline void roaring_free_uint32_iterator(
+    roaring_uint32_iterator_t *it) {
+    roaring_uint32_iterator_free(it);
+}
 
 /*
  * Reads next ${count} values from iterator into user-supplied ${buf}.
  * Returns the number of read elements.
- * This number can be smaller than ${count}, which means that iterator is drained.
+ * This number can be smaller than ${count}, which means that iterator is
+ * drained.
  *
  * This function satisfies semantics of iteration and can be used together with
  * other iterator functions.
  *  - first value is copied from ${it}->current_value
  *  - after function returns, iterator is positioned at the next element
  */
-uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
-                                      uint32_t* buf, uint32_t count);
+uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
+                                      uint32_t *buf, uint32_t count);
+
+/** DEPRECATED, use `roaring_uint32_iterator_read`. */
+CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator(
+    roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) {
+    return roaring_uint32_iterator_read(it, buf, count);
+}
 
 #ifdef __cplusplus
-} } }  // extern "C" { namespace roaring { namespace api {
+}
+}
+}  // extern "C" { namespace roaring { namespace api {
 #endif
 
-#endif  /* ROARING_H */
+#endif /* ROARING_H */
 
 #ifdef __cplusplus
-    /**
-     * Best practices for C++ headers is to avoid polluting global scope.
-     * But for C compatibility when just `roaring.h` is included building as
-     * C++, default to global access for the C public API.
-     *
-     * BUT when `roaring.hh` is included instead, it sets this flag.  That way
-     * explicit namespacing must be used to get the C functions.
-     *
-     * This is outside the include guard so that if you include BOTH headers,
-     * the order won't matter; you still get the global definitions.
-     */
-    #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
-        using namespace ::roaring::api;
-    #endif
+/**
+ * Best practices for C++ headers is to avoid polluting global scope.
+ * But for C compatibility when just `roaring.h` is included building as
+ * C++, default to global access for the C public API.
+ *
+ * BUT when `roaring.hh` is included instead, it sets this flag.  That way
+ * explicit namespacing must be used to get the C functions.
+ *
+ * This is outside the include guard so that if you include BOTH headers,
+ * the order won't matter; you still get the global definitions.
+ */
+#if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
+using namespace ::roaring::api;
+#endif
 #endif
 /* end file include/roaring/roaring.h */
 /* begin file include/roaring/memory.h */
@@ -1141,3 +2245,660 @@ void roaring_aligned_free(void*);
 
 #endif  // INCLUDE_ROARING_MEMORY_H_
 /* end file include/roaring/memory.h */
+/* begin file include/roaring/roaring64.h */
+#ifndef ROARING64_H
+#define ROARING64_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace api {
+#endif
+
+typedef struct roaring64_bitmap_s roaring64_bitmap_t;
+typedef struct roaring64_leaf_s roaring64_leaf_t;
+typedef struct roaring64_iterator_s roaring64_iterator_t;
+
+/**
+ * A bit of context usable with `roaring64_bitmap_*_bulk()` functions.
+ *
+ * Should be initialized with `{0}` (or `memset()` to all zeros).
+ * Callers should treat it as an opaque type.
+ *
+ * A context may only be used with a single bitmap (unless re-initialized to
+ * zero), and any modification to a bitmap (other than modifications performed
+ * with `_bulk()` functions with the context passed) will invalidate any
+ * contexts associated with that bitmap.
+ */
+typedef struct roaring64_bulk_context_s {
+    uint8_t high_bytes[6];
+    roaring64_leaf_t *leaf;
+} roaring64_bulk_context_t;
+
+/**
+ * Dynamically allocates a new bitmap (initially empty).
+ * Client is responsible for calling `roaring64_bitmap_free()`.
+ */
+roaring64_bitmap_t *roaring64_bitmap_create(void);
+void roaring64_bitmap_free(roaring64_bitmap_t *r);
+
+/**
+ * Returns a copy of a bitmap.
+ */
+roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r);
+
+/**
+ * Creates a new bitmap of a pointer to N 64-bit integers.
+ */
+roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args,
+                                            const uint64_t *vals);
+
+#ifdef __cplusplus
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring64_bitmap_of_ptr` function instead.
+ */
+// Use an immediately invoked closure, capturing by reference
+// (in case __VA_ARGS__ refers to context outside the closure)
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring64_bitmap_from(...)                                       \
+    [&]() {                                                              \
+        const uint64_t roaring64_bitmap_from_array[] = {0, __VA_ARGS__}; \
+        return roaring64_bitmap_of_ptr(                                  \
+            (sizeof(roaring64_bitmap_from_array) /                       \
+             sizeof(roaring64_bitmap_from_array[0])) -                   \
+                1,                                                       \
+            &roaring64_bitmap_from_array[1]);                            \
+    }()
+#else
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring64_bitmap_of_ptr` function instead.
+ */
+// While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof
+// expression, which is an unevaluated context, so it's even safe in the case
+// where expressions passed have side effects (roaring64_bitmap_from(my_func(),
+// ++i))
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring64_bitmap_from(...)                                           \
+    roaring64_bitmap_of_ptr(                                                 \
+        (sizeof((const uint64_t[]){0, __VA_ARGS__}) / sizeof(uint64_t)) - 1, \
+        &((const uint64_t[]){0, __VA_ARGS__})[1])
+#endif
+
+/**
+ * Create a new bitmap containing all the values in [min, max) that are at a
+ * distance k*step from min.
+ */
+roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max,
+                                                uint64_t step);
+
+/**
+ * Adds the provided value to the bitmap.
+ */
+void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Adds the provided value to the bitmap.
+ * Returns true if a new value was added, false if the value already existed.
+ */
+bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Add an item, using context from a previous insert for faster insertion.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same high 48 bits of the value consecutively.
+ */
+void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r,
+                               roaring64_bulk_context_t *context, uint64_t val);
+
+/**
+ * Add `n_args` values from `vals`, faster than repeatedly calling
+ * `roaring64_bitmap_add()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same high 48 bits of the value as consecutive elements in
+ * `vals`.
+ */
+void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args,
+                               const uint64_t *vals);
+
+/**
+ * Add all values in range [min, max).
+ */
+void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min,
+                                uint64_t max);
+
+/**
+ * Add all values in range [min, max].
+ */
+void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min,
+                                       uint64_t max);
+
+/**
+ * Removes a value from the bitmap if present.
+ */
+void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Removes a value from the bitmap if present, returns true if the value was
+ * removed and false if the value was not present.
+ */
+bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Remove an item, using context from a previous insert for faster removal.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same high 48 bits of the value consecutively.
+ */
+void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r,
+                                  roaring64_bulk_context_t *context,
+                                  uint64_t val);
+
+/**
+ * Remove `n_args` values from `vals`, faster than repeatedly calling
+ * `roaring64_bitmap_remove()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same high 48 bits of the value as consecutive elements in
+ * `vals`.
+ */
+void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args,
+                                  const uint64_t *vals);
+
+/**
+ * Remove all values in range [min, max).
+ */
+void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min,
+                                   uint64_t max);
+
+/**
+ * Remove all values in range [min, max].
+ */
+void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min,
+                                          uint64_t max);
+
+/**
+ * Returns true if the provided value is present.
+ */
+bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Returns true if all values in the range [min, max) are present.
+ */
+bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min,
+                                     uint64_t max);
+
+/**
+ * Check if an item is present using context from a previous insert or search
+ * for faster search.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same high 48 bits of the value consecutively.
+ */
+bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r,
+                                    roaring64_bulk_context_t *context,
+                                    uint64_t val);
+
+/**
+ * Selects the element at index 'rank' where the smallest element is at index 0.
+ * If the size of the bitmap is strictly greater than rank, then this function
+ * returns true and sets element to the element of given rank. Otherwise, it
+ * returns false.
+ */
+bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank,
+                             uint64_t *element);
+
+/**
+ * Returns the number of integers that are smaller or equal to x. Thus if x is
+ * the first element, this function will return 1. If x is smaller than the
+ * smallest element, this function will return 0.
+ *
+ * The indexing convention differs between roaring64_bitmap_select and
+ * roaring64_bitmap_rank: roaring_bitmap64_select refers to the smallest value
+ * as having index 0, whereas roaring64_bitmap_rank returns 1 when ranking
+ * the smallest value.
+ */
+uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Returns true if the given value is in the bitmap, and sets `out_index` to the
+ * (0-based) index of the value in the bitmap. Returns false if the value is not
+ * in the bitmap.
+ */
+bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val,
+                                uint64_t *out_index);
+
+/**
+ * Returns the number of values in the bitmap.
+ */
+uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r);
+
+/**
+ * Returns the number of elements in the range [min, max).
+ */
+uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r,
+                                            uint64_t min, uint64_t max);
+
+/**
+ * Returns true if the bitmap is empty (cardinality is zero).
+ */
+bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r);
+
+/**
+ * Returns the smallest value in the set, or UINT64_MAX if the set is empty.
+ */
+uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r);
+
+/**
+ * Returns the largest value in the set, or 0 if empty.
+ */
+uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r);
+
+/**
+ * Returns true if the result has at least one run container.
+ */
+bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r);
+
+/**
+ * Perform internal consistency checks.
+ *
+ * Returns true if the bitmap is consistent. It may be useful to call this
+ * after deserializing bitmaps from untrusted sources. If
+ * roaring64_bitmap_internal_validate returns true, then the bitmap is
+ * consistent and can be trusted not to cause crashes or memory corruption.
+ *
+ * If reason is non-null, it will be set to a string describing the first
+ * inconsistency found if any.
+ */
+bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r,
+                                        const char **reason);
+
+/**
+ * Return true if the two bitmaps contain the same elements.
+ */
+bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1,
+                             const roaring64_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2.
+ */
+bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1,
+                                const roaring64_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2, and r2 is strictly
+ * greater than r1.
+ */
+bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1,
+                                       const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the intersection between two bitmaps and returns new bitmap. The
+ * caller is responsible for free-ing the result.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may
+ * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary
+ * bitmaps.
+ */
+roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1,
+                                         const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the intersection between two bitmaps.
+ */
+uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1,
+                                          const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_and()`, modifies `r1`. `r1` and `r2`
+ * are allowed to be equal.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmaps.
+ */
+void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1,
+                                  const roaring64_bitmap_t *r2);
+
+/**
+ * Check whether two bitmaps intersect.
+ */
+bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1,
+                                const roaring64_bitmap_t *r2);
+
+/**
+ * Check whether a bitmap intersects the range [min, max).
+ */
+bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r,
+                                           uint64_t min, uint64_t max);
+
+/**
+ * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
+ * distance, or the Jaccard similarity coefficient)
+ *
+ * The Jaccard index is undefined if both bitmaps are empty.
+ */
+double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1,
+                                      const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the union between two bitmaps and returns new bitmap. The caller is
+ * responsible for free-ing the result.
+ */
+roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1,
+                                        const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the union between two bitmaps.
+ */
+uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1,
+                                         const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_or(), modifies `r1`.
+ */
+void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1,
+                                 const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the symmetric difference (xor) between two bitmaps and returns a new
+ * bitmap. The caller is responsible for free-ing the result.
+ */
+roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1,
+                                         const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the symmetric difference (xor) between two bitmaps.
+ */
+uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1,
+                                          const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_xor()`, modifies `r1`. `r1` and `r2`
+ * are not allowed to be equal (that would result in an empty bitmap).
+ */
+void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1,
+                                  const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the difference (andnot) between two bitmaps and returns a new
+ * bitmap. The caller is responsible for free-ing the result.
+ */
+roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1,
+                                            const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the difference (andnot) between two bitmaps.
+ */
+uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1,
+                                             const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_andnot()`, modifies `r1`. `r1` and `r2`
+ * are not allowed to be equal (that would result in an empty bitmap).
+ */
+void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1,
+                                     const roaring64_bitmap_t *r2);
+
+/**
+ * Compute the negation of the bitmap in the interval [min, max).
+ * The number of negated values is `max - min`. Areas outside the range are
+ * passed through unchanged.
+ */
+roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r,
+                                          uint64_t min, uint64_t max);
+
+/**
+ * Compute the negation of the bitmap in the interval [min, max].
+ * The number of negated values is `max - min + 1`. Areas outside the range are
+ * passed through unchanged.
+ */
+roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r,
+                                                 uint64_t min, uint64_t max);
+
+/**
+ * In-place version of `roaring64_bitmap_flip`. Compute the negation of the
+ * bitmap in the interval [min, max). The number of negated values is `max -
+ * min`. Areas outside the range are passed through unchanged.
+ */
+void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min,
+                                   uint64_t max);
+/**
+ * In-place version of `roaring64_bitmap_flip_closed`. Compute the negation of
+ * the bitmap in the interval [min, max]. The number of negated values is `max -
+ * min + 1`. Areas outside the range are passed through unchanged.
+ */
+void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
+                                          uint64_t max);
+/**
+ * How many bytes are required to serialize this bitmap.
+ *
+ * This is meant to be compatible with other languages:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ */
+size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r);
+
+/**
+ * Write a bitmap to a buffer. The output buffer should refer to at least
+ * `roaring64_bitmap_portable_size_in_bytes(r)` bytes of allocated memory.
+ *
+ * Returns how many bytes were written, which should match
+ * `roaring64_bitmap_portable_size_in_bytes(r)`.
+ *
+ * This is meant to be compatible with other languages:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
+ */
+size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r,
+                                           char *buf);
+/**
+ * Check how many bytes would be read (up to maxbytes) at this pointer if there
+ * is a valid bitmap, returns zero if there is no valid bitmap.
+ *
+ * This is meant to be compatible with other languages
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ */
+size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
+                                                  size_t maxbytes);
+
+/**
+ * Read a bitmap from a serialized buffer safely (reading up to maxbytes).
+ * In case of failure, NULL is returned.
+ *
+ * This is meant to be compatible with other languages
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ *
+ * The function itself is safe in the sense that it will not cause buffer
+ * overflows. However, for correct operations, it is assumed that the bitmap
+ * read was once serialized from a valid bitmap (i.e., it follows the format
+ * specification). If you provided an incorrect input (garbage), then the bitmap
+ * read may not be in a valid state and following operations may not lead to
+ * sensible results. In particular, the serialized array containers need to be
+ * in sorted order, and the run containers should be in sorted non-overlapping
+ * order. This is is guaranteed to happen when serializing an existing bitmap,
+ * but not for random inputs.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
+ */
+roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf,
+                                                               size_t maxbytes);
+
+/**
+ * Iterate over the bitmap elements. The function `iterator` is called once for
+ * all the values with `ptr` (can be NULL) as the second parameter of each call.
+ *
+ * `roaring_iterator64` is simply a pointer to a function that returns a bool
+ * and takes `(uint64_t, void*)` as inputs. True means that the iteration should
+ * continue, while false means that it should stop.
+ *
+ * Returns true if the `roaring64_iterator` returned true throughout (so that
+ * all data points were necessarily visited).
+ *
+ * Iteration is ordered from the smallest to the largest elements.
+ */
+bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,
+                              roaring_iterator64 iterator, void *ptr);
+
+/**
+ * Convert the bitmap to a sorted array `out`.
+ *
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ * ```
+ * out = malloc(roaring64_bitmap_get_cardinality(bitmap) * sizeof(uint64_t));
+ * ```
+ */
+void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r,
+                                      uint64_t *out);
+
+/**
+ * Create an iterator object that can be used to iterate through the values.
+ * Caller is responsible for calling `roaring64_iterator_free()`.
+ *
+ * The iterator is initialized. If there is a value, then this iterator points
+ * to the first value and `roaring64_iterator_has_value()` returns true. The
+ * value can be retrieved with `roaring64_iterator_value()`.
+ */
+roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r);
+
+/**
+ * Create an iterator object that can be used to iterate through the values.
+ * Caller is responsible for calling `roaring64_iterator_free()`.
+ *
+ * The iterator is initialized. If there is a value, then this iterator points
+ * to the last value and `roaring64_iterator_has_value()` returns true. The
+ * value can be retrieved with `roaring64_iterator_value()`.
+ */
+roaring64_iterator_t *roaring64_iterator_create_last(
+    const roaring64_bitmap_t *r);
+
+/**
+ * Re-initializes an existing iterator. Functionally the same as
+ * `roaring64_iterator_create` without a allocation.
+ */
+void roaring64_iterator_reinit(const roaring64_bitmap_t *r,
+                               roaring64_iterator_t *it);
+
+/**
+ * Re-initializes an existing iterator. Functionally the same as
+ * `roaring64_iterator_create_last` without a allocation.
+ */
+void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r,
+                                    roaring64_iterator_t *it);
+
+/**
+ * Creates a copy of the iterator. Caller is responsible for calling
+ * `roaring64_iterator_free()` on the resulting iterator.
+ */
+roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it);
+
+/**
+ * Free the iterator.
+ */
+void roaring64_iterator_free(roaring64_iterator_t *it);
+
+/**
+ * Returns true if the iterator currently points to a value. If so, calling
+ * `roaring64_iterator_value()` returns the value.
+ */
+bool roaring64_iterator_has_value(const roaring64_iterator_t *it);
+
+/**
+ * Returns the value the iterator currently points to. Should only be called if
+ * `roaring64_iterator_has_value()` returns true.
+ */
+uint64_t roaring64_iterator_value(const roaring64_iterator_t *it);
+
+/**
+ * Advance the iterator. If there is a new value, then
+ * `roaring64_iterator_has_value()` returns true. Values are traversed in
+ * increasing order. For convenience, returns the result of
+ * `roaring64_iterator_has_value()`.
+ *
+ * Once this returns false, `roaring64_iterator_advance` should not be called on
+ * the iterator again. Calling `roaring64_iterator_previous` is allowed.
+ */
+bool roaring64_iterator_advance(roaring64_iterator_t *it);
+
+/**
+ * Decrement the iterator. If there is a new value, then
+ * `roaring64_iterator_has_value()` returns true. Values are traversed in
+ * decreasing order. For convenience, returns the result of
+ * `roaring64_iterator_has_value()`.
+ *
+ * Once this returns false, `roaring64_iterator_previous` should not be called
+ * on the iterator again. Calling `roaring64_iterator_advance` is allowed.
+ */
+bool roaring64_iterator_previous(roaring64_iterator_t *it);
+
+/**
+ * Move the iterator to the first value greater than or equal to `val`, if it
+ * exists at or after the current position of the iterator. If there is a new
+ * value, then `roaring64_iterator_has_value()` returns true. Values are
+ * traversed in increasing order. For convenience, returns the result of
+ * `roaring64_iterator_has_value()`.
+ */
+bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it,
+                                           uint64_t val);
+
+/**
+ * Reads up to `count` values from the iterator into the given `buf`. Returns
+ * the number of elements read. The number of elements read can be smaller than
+ * `count`, which means that there are no more elements in the bitmap.
+ *
+ * This function can be used together with other iterator functions.
+ */
+uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf,
+                                 uint64_t count);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace roaring
+}  // namespace api
+#endif
+
+#endif /* ROARING64_H */
+/* end file include/roaring/roaring64.h */
+#endif
diff --git a/src/lib/third_party/include/roaring_v2.h b/src/lib/third_party/include/roaring_v2.h
new file mode 100644
index 000000000..500ba9cb9
--- /dev/null
+++ b/src/lib/third_party/include/roaring_v2.h
@@ -0,0 +1,1143 @@
+// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
+// Created by amalgamation.sh on 2023-02-12T11:34:02Z
+
+/*
+ * The CRoaring project is under a dual license (Apache/MIT).
+ * Users of the library may choose one or the other license.
+ */
+/*
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/*
+ * MIT License
+ *
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Permission is hereby granted, free of charge, to any
+ * person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the
+ * Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software
+ * is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+ * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+ * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+/* begin file include/roaring/roaring_version.h */
+// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
+#ifndef ROARING_INCLUDE_ROARING_VERSION 
+#define ROARING_INCLUDE_ROARING_VERSION 
+#define ROARING_VERSION "0.9.6"
+enum { 
+    ROARING_VERSION_MAJOR = 0,
+    ROARING_VERSION_MINOR = 9,
+    ROARING_VERSION_REVISION = 6
+}; 
+#endif // ROARING_INCLUDE_ROARING_VERSION 
+/* end file include/roaring/roaring_version.h */
+/* begin file include/roaring/roaring_types.h */
+/*
+  Typedefs used by various components
+*/
+
+#ifndef ROARING_TYPES_H
+#define ROARING_TYPES_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace api {
+#endif
+
+
+/**
+ * When building .c files as C++, there's added compile-time checking if the
+ * container types are derived from a `container_t` base class.  So long as
+ * such a base class is empty, the struct will behave compatibly with C structs
+ * despite the derivation.  This is due to the Empty Base Class Optimization:
+ *
+ * https://en.cppreference.com/w/cpp/language/ebo
+ *
+ * But since C isn't namespaced, taking `container_t` globally might collide
+ * with other projects.  So roaring.h uses ROARING_CONTAINER_T, while internal
+ * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;`
+ */
+#if defined(__cplusplus)
+    extern "C++" {
+      struct container_s {};
+    }
+    #define ROARING_CONTAINER_T ::roaring::api::container_s
+#else
+    #define ROARING_CONTAINER_T void  // no compile-time checking
+#endif
+
+#define ROARING_FLAG_COW UINT8_C(0x1)
+#define ROARING_FLAG_FROZEN UINT8_C(0x2)
+
+/**
+ * Roaring arrays are array-based key-value pairs having containers as values
+ * and 16-bit integer keys. A roaring bitmap  might be implemented as such.
+ */
+
+// parallel arrays.  Element sizes quite different.
+// Alternative is array
+// of structs.  Which would have better
+// cache performance through binary searches?
+
+typedef struct roaring_array_s {
+    int32_t size;
+    int32_t allocation_size;
+    ROARING_CONTAINER_T **containers;  // Use container_t in non-API files!
+    uint16_t *keys;
+    uint8_t *typecodes;
+    uint8_t flags;
+} roaring_array_t;
+
+
+typedef bool (*roaring_iterator)(uint32_t value, void *param);
+typedef bool (*roaring_iterator64)(uint64_t value, void *param);
+
+/**
+*  (For advanced users.)
+* The roaring_statistics_t can be used to collect detailed statistics about
+* the composition of a roaring bitmap.
+*/
+typedef struct roaring_statistics_s {
+    uint32_t n_containers; /* number of containers */
+
+    uint32_t n_array_containers;  /* number of array containers */
+    uint32_t n_run_containers;    /* number of run containers */
+    uint32_t n_bitset_containers; /* number of bitmap containers */
+
+    uint32_t
+        n_values_array_containers;    /* number of values in array containers */
+    uint32_t n_values_run_containers; /* number of values in run containers */
+    uint32_t
+        n_values_bitset_containers; /* number of values in  bitmap containers */
+
+    uint32_t n_bytes_array_containers;  /* number of allocated bytes in array
+                                           containers */
+    uint32_t n_bytes_run_containers;    /* number of allocated bytes in run
+                                           containers */
+    uint32_t n_bytes_bitset_containers; /* number of allocated bytes in  bitmap
+                                           containers */
+
+    uint32_t
+        max_value; /* the maximal value, undefined if cardinality is zero */
+    uint32_t
+        min_value; /* the minimal value, undefined if cardinality is zero */
+    uint64_t sum_value; /* the sum of all values (could be used to compute
+                           average) */
+
+    uint64_t cardinality; /* total number of values stored in the bitmap */
+
+    // and n_values_arrays, n_values_rle, n_values_bitmap
+} roaring_statistics_t;
+
+#ifdef __cplusplus
+} } }  // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif /* ROARING_TYPES_H */
+/* end file include/roaring/roaring_types.h */
+/* begin file include/roaring/roaring.h */
+/*
+ * An implementation of Roaring Bitmaps in C.
+ */
+
+#ifndef ROARING_H
+#define ROARING_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>  // for `size_t`
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace api {
+#endif
+
+typedef struct roaring_bitmap_s {
+    roaring_array_t high_low_container;
+} roaring_bitmap_t;
+
+/**
+ * Dynamically allocates a new bitmap (initially empty).
+ * Returns NULL if the allocation fails.
+ * Capacity is a performance hint for how many "containers" the data will need.
+ * Client is responsible for calling `roaring_bitmap_free()`.
+ */
+roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
+
+/**
+ * Dynamically allocates a new bitmap (initially empty).
+ * Returns NULL if the allocation fails.
+ * Client is responsible for calling `roaring_bitmap_free()`.
+ */
+static inline roaring_bitmap_t *roaring_bitmap_create(void)
+  { return roaring_bitmap_create_with_capacity(0); }
+
+/**
+ * Initialize a roaring bitmap structure in memory controlled by client.
+ * Capacity is a performance hint for how many "containers" the data will need.
+ * Can return false if auxiliary allocations fail when capacity greater than 0.
+ */
+bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
+
+/**
+ * Initialize a roaring bitmap structure in memory controlled by client.
+ * The bitmap will be in a "clear" state, with no auxiliary allocations.
+ * Since this performs no allocations, the function will not fail.
+ */
+static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
+  { roaring_bitmap_init_with_capacity(r, 0); }
+
+/**
+ * Add all the values between min (included) and max (excluded) that are at a
+ * distance k*step from min.
+*/
+roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
+                                            uint32_t step);
+
+/**
+ * Creates a new bitmap from a pointer of uint32_t integers
+ */
+roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
+
+/*
+ * Whether you want to use copy-on-write.
+ * Saves memory and avoids copies, but needs more care in a threaded context.
+ * Most users should ignore this flag.
+ *
+ * Note: If you do turn this flag to 'true', enabling COW, then ensure that you
+ * do so for all of your bitmaps, since interactions between bitmaps with and
+ * without COW is unsafe.
+ */
+static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
+    return r->high_low_container.flags & ROARING_FLAG_COW;
+}
+static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
+                                                    bool cow) {
+    if (cow) {
+        r->high_low_container.flags |= ROARING_FLAG_COW;
+    } else {
+        r->high_low_container.flags &= ~ROARING_FLAG_COW;
+    }
+}
+
+roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
+                                            int64_t offset);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/**
+ * Describe the inner structure of the bitmap.
+ */
+void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
+#endif
+
+/**
+ * Creates a new bitmap from a list of uint32_t integers
+ */
+roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+
+/**
+ * Copies a bitmap (this does memory allocation).
+ * The caller is responsible for memory management.
+ */
+roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
+
+/**
+ * Copies a bitmap from src to dest. It is assumed that the pointer dest
+ * is to an already allocated bitmap. The content of the dest bitmap is
+ * freed/deleted.
+ *
+ * It might be preferable and simpler to call roaring_bitmap_copy except
+ * that roaring_bitmap_overwrite can save on memory allocations.
+ */
+bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
+                              const roaring_bitmap_t *src);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/**
+ * Print the content of the bitmap.
+ */
+void roaring_bitmap_printf(const roaring_bitmap_t *r);
+#endif
+
+/**
+ * Computes the intersection between two bitmaps and returns new bitmap. The
+ * caller is responsible for memory management.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+ * You may also rely on roaring_bitmap_and_inplace to avoid creating
+ * many temporary bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1,
+                                     const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the intersection between two bitmaps.
+ */
+uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1,
+                                        const roaring_bitmap_t *r2);
+
+/**
+ * Check whether two bitmaps intersect.
+ */
+bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
+                              const roaring_bitmap_t *r2);
+
+/**
+ * Check whether a bitmap and a closed range intersect.
+ */
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
+                                         uint64_t x, uint64_t y);
+
+/**
+ * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
+ * distance, or the Jaccard similarity coefficient)
+ *
+ * The Jaccard index is undefined if both bitmaps are empty.
+ */
+double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1,
+                                    const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the union between two bitmaps.
+ */
+uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1,
+                                       const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the difference (andnot) between two bitmaps.
+ */
+uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1,
+                                           const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the symmetric difference (xor) between two bitmaps.
+ */
+uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1,
+                                        const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of `roaring_bitmap_and()`, modifies r1
+ * r1 == r2 is allowed.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+ */
+void roaring_bitmap_and_inplace(roaring_bitmap_t *r1,
+                                const roaring_bitmap_t *r2);
+
+/**
+ * Computes the union between two bitmaps and returns new bitmap. The caller is
+ * responsible for memory management.
+ */
+roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1,
+                                    const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of `roaring_bitmap_or(), modifies r1.
+ * TODO: decide whether r1 == r2 ok
+ */
+void roaring_bitmap_or_inplace(roaring_bitmap_t *r1,
+                               const roaring_bitmap_t *r2);
+
+/**
+ * Compute the union of 'number' bitmaps.
+ * Caller is responsible for freeing the result.
+ * See also `roaring_bitmap_or_many_heap()`
+ */
+roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
+                                         const roaring_bitmap_t **rs);
+
+/**
+ * Compute the union of 'number' bitmaps using a heap. This can sometimes be
+ * faster than `roaring_bitmap_or_many() which uses a naive algorithm.
+ * Caller is responsible for freeing the result.
+ */
+roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
+                                              const roaring_bitmap_t **rs);
+
+/**
+ * Computes the symmetric difference (xor) between two bitmaps
+ * and returns new bitmap. The caller is responsible for memory management.
+ */
+roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1,
+                                     const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2.
+ */
+void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1,
+                                const roaring_bitmap_t *r2);
+
+/**
+ * Compute the xor of 'number' bitmaps.
+ * Caller is responsible for freeing the result.
+ */
+roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
+                                          const roaring_bitmap_t **rs);
+
+/**
+ * Computes the difference (andnot) between two bitmaps and returns new bitmap.
+ * Caller is responsible for freeing the result.
+ */
+roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1,
+                                        const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2.
+ */
+void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
+                                   const roaring_bitmap_t *r2);
+
+/**
+ * TODO: consider implementing:
+ *
+ * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be
+ *  faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is
+ *  responsible for freeing the result.""
+ *
+ * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number,
+ *                                                const roaring_bitmap_t **rs);
+ */
+
+/**
+ * Frees the memory.
+ */
+void roaring_bitmap_free(const roaring_bitmap_t *r);
+
+/**
+ * A bit of context usable with `roaring_bitmap_*_bulk()` functions
+ *
+ * Should be initialized with `{0}` (or `memset()` to all zeros).
+ * Callers should treat it as an opaque type.
+ *
+ * A context may only be used with a single bitmap
+ * (unless re-initialized to zero), and any modification to a bitmap
+ * (other than modifications performed with `_bulk()` functions with the context
+ * passed) will invalidate any contexts associated with that bitmap.
+ */
+typedef struct roaring_bulk_context_s {
+    ROARING_CONTAINER_T *container;
+    int idx;
+    uint16_t key;
+    uint8_t typecode;
+} roaring_bulk_context_t;
+
+/**
+ * Add an item, using context from a previous insert for speed optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+                             roaring_bulk_context_t *context, uint32_t val);
+
+/**
+ * Add value n_args from pointer vals, faster than repeatedly calling
+ * `roaring_bitmap_add()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same "key" (high 16 bits of the value) as consecutive
+ * elements in `vals`
+ */
+void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
+                             const uint32_t *vals);
+
+/**
+ * Add value x
+ */
+void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Add value x
+ * Returns true if a new value was added, false if the value already existed.
+ */
+bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Add all values in range [min, max]
+ */
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
+                                     uint32_t min, uint32_t max);
+
+/**
+ * Add all values in range [min, max)
+ */
+static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
+                                            uint64_t min, uint64_t max) {
+    if(max == min) return;
+    roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
+}
+
+/**
+ * Remove value x
+ */
+void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Remove all values in range [min, max]
+ */
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
+                                        uint32_t min, uint32_t max);
+
+/**
+ * Remove all values in range [min, max)
+ */
+static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
+                                               uint64_t min, uint64_t max) {
+    if(max == min) return;
+    roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
+}
+
+/**
+ * Remove multiple values
+ */
+void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
+                                const uint32_t *vals);
+
+/**
+ * Remove value x
+ * Returns true if a new value was removed, false if the value was not existing.
+ */
+bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Check if value is present
+ */
+bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
+
+/**
+ * Check whether a range of values from range_start (included)
+ * to range_end (excluded) is present
+ */
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
+                                   uint64_t range_start,
+                                   uint64_t range_end);
+
+/**
+ * Check if an items is present, using context from a previous insert for speed
+ * optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+                                  roaring_bulk_context_t *context,
+                                  uint32_t val);
+
+/**
+ * Get the cardinality of the bitmap (number of elements).
+ */
+uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r);
+
+/**
+ * Returns the number of elements in the range [range_start, range_end).
+ */
+uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
+                                          uint64_t range_start,
+                                          uint64_t range_end);
+
+/**
+* Returns true if the bitmap is empty (cardinality is zero).
+*/
+bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
+
+
+/**
+ * Empties the bitmap.  It will have no auxiliary allocations (so if the bitmap
+ * was initialized in client memory via roaring_bitmap_init(), then a call to
+ * roaring_bitmap_clear() would be enough to "free" it)
+ */
+void roaring_bitmap_clear(roaring_bitmap_t *r);
+
+/**
+ * Convert the bitmap to a sorted array, output in `ans`.
+ *
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ *
+ *     ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t));
+ */
+void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
+
+
+/**
+ * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`.
+ *
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ *
+ *     ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t));
+ *
+ * Return false in case of failure (e.g., insufficient memory)
+ */
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
+                                       size_t offset, size_t limit,
+                                       uint32_t *ans);
+
+/**
+ * Remove run-length encoding even when it is more space efficient.
+ * Return whether a change was applied.
+ */
+bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
+
+/**
+ * Convert array and bitmap containers to run containers when it is more
+ * efficient; also convert from run containers when more space efficient.
+ *
+ * Returns true if the result has at least one run container.
+ * Additional savings might be possible by calling `shrinkToFit()`.
+ */
+bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
+
+/**
+ * If needed, reallocate memory to shrink the memory usage.
+ * Returns the number of bytes saved.
+ */
+size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
+
+/**
+ * Write the bitmap to an output pointer, this output buffer should refer to
+ * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes.
+ *
+ * See `roaring_bitmap_portable_serialize()` if you want a format that's
+ * compatible with Java and Go implementations.  This format can sometimes be
+ * more space efficient than the portable form, e.g. when the data is sparse.
+ *
+ * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
+
+/**
+ * Use with `roaring_bitmap_serialize()`.
+ *
+ * (See `roaring_bitmap_portable_deserialize()` if you want a format that's
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
+
+/**
+ * How many bytes are required to serialize this bitmap (NOT compatible
+ * with Java and Go versions)
+ */
+size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
+
+/**
+ * Read bitmap from a serialized buffer.
+ * In case of failure, NULL is returned.
+ *
+ * This function is unsafe in the sense that if there is no valid serialized
+ * bitmap at the pointer, then many bytes could be read, possibly causing a
+ * buffer overflow.  See also roaring_bitmap_portable_deserialize_safe().
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+*
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
+
+/**
+ * Read bitmap from a serialized buffer safely (reading up to maxbytes).
+ * In case of failure, NULL is returned.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
+                                                           size_t maxbytes);
+
+/**
+ * Read bitmap from a serialized buffer.
+ * In case of failure, NULL is returned.
+ *
+ * Bitmap returned by this function can be used in all readonly contexts.
+ * Bitmap must be freed as usual, by calling roaring_bitmap_free().
+ * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * The function is unsafe in the following ways:
+ * 1) It may execute unaligned memory accesses.
+ * 2) A buffer overflow may occur if buf does not point to a valid serialized
+ *    bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
+
+/**
+ * Check how many bytes would be read (up to maxbytes) at this pointer if there
+ * is a bitmap, returns zero if there is no valid bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ */
+size_t roaring_bitmap_portable_deserialize_size(const char *buf,
+                                                size_t maxbytes);
+
+/**
+ * How many bytes are required to serialize this bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ */
+size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
+
+/**
+ * Write a bitmap to a char buffer.  The output buffer should refer to at least
+ * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory.
+ *
+ * Returns how many bytes were written which should match
+ * `roaring_bitmap_portable_size_in_bytes(r)`.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
+
+/*
+ * "Frozen" serialization format imitates memory layout of roaring_bitmap_t.
+ * Deserialized bitmap is a constant view of the underlying buffer.
+ * This significantly reduces amount of allocations and copying required during
+ * deserialization.
+ * It can be used with memory mapped files.
+ * Example can be found in benchmarks/frozen_benchmark.c
+ *
+ *         [#####] const roaring_bitmap_t *
+ *          | | |
+ *     +----+ | +-+
+ *     |      |   |
+ * [#####################################] underlying buffer
+ *
+ * Note that because frozen serialization format imitates C memory layout
+ * of roaring_bitmap_t, it is not fixed. It is different on big/little endian
+ * platforms and can be changed in future.
+ */
+
+/**
+ * Returns number of bytes required to serialize bitmap using frozen format.
+ */
+size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
+
+/**
+ * Serializes bitmap using frozen format.
+ * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
+
+/**
+ * Creates constant bitmap that is a view of a given buffer.
+ * Buffer data should have been written by `roaring_bitmap_frozen_serialize()`
+ * Its beginning must also be aligned by 32 bytes.
+ * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`.
+ * In case of failure, NULL is returned.
+ *
+ * Bitmap returned by this function can be used in all readonly contexts.
+ * Bitmap must be freed as usual, by calling roaring_bitmap_free().
+ * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
+                                                   size_t length);
+
+/**
+ * Iterate over the bitmap elements. The function iterator is called once for
+ * all the values with ptr (can be NULL) as the second parameter of each call.
+ *
+ * `roaring_iterator` is simply a pointer to a function that returns bool
+ * (true means that the iteration should continue while false means that it
+ * should stop), and takes (uint32_t,void*) as inputs.
+ *
+ * Returns true if the roaring_iterator returned true throughout (so that all
+ * data points were necessarily visited).
+ *
+ * Iteration is ordered: from the smallest to the largest elements.
+ */
+bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
+                     void *ptr);
+
+bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
+                       uint64_t high_bits, void *ptr);
+
+/**
+ * Return true if the two bitmaps contain the same elements.
+ */
+bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
+                           const roaring_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2.
+ */
+bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
+                              const roaring_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2, and r2 is strictly
+ * greater than r1.
+ */
+bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
+                                     const roaring_bitmap_t *r2);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Computes the union between two bitmaps and returns new bitmap. The caller is
+ * responsible for memory management.
+ *
+ * The lazy version defers some computations such as the maintenance of the
+ * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()`
+ * after executing "lazy" computations.
+ *
+ * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result.
+ *
+ * `bitsetconversion` is a flag which determines whether container-container
+ * operations force a bitset conversion.
+ */
+roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1,
+                                         const roaring_bitmap_t *r2,
+                                         const bool bitsetconversion);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Inplace version of roaring_bitmap_lazy_or, modifies r1.
+ *
+ * `bitsetconversion` is a flag which determines whether container-container
+ * operations force a bitset conversion.
+ */
+void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1,
+                                    const roaring_bitmap_t *r2,
+                                    const bool bitsetconversion);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()`
+ * or modified with `roaring_bitmap_lazy_or_inplace()`.
+ */
+void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1);
+
+/**
+ * Computes the symmetric difference between two bitmaps and returns new bitmap.
+ * The caller is responsible for memory management.
+ *
+ * The lazy version defers some computations such as the maintenance of the
+ * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()`
+ * after executing "lazy" computations.
+ *
+ * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on
+ * the result.
+ */
+roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1,
+                                          const roaring_bitmap_t *r2);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2
+ */
+void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1,
+                                     const roaring_bitmap_t *r2);
+
+/**
+ * Compute the negation of the bitmap in the interval [range_start, range_end).
+ * The number of negated values is range_end - range_start.
+ * Areas outside the range are passed through unchanged.
+ */
+roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1,
+                                      uint64_t range_start, uint64_t range_end);
+
+/**
+ * compute (in place) the negation of the roaring bitmap within a specified
+ * interval: [range_start, range_end). The number of negated values is
+ * range_end - range_start.
+ * Areas outside the range are passed through unchanged.
+ */
+void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start,
+                                 uint64_t range_end);
+
+/**
+ * Selects the element at index 'rank' where the smallest element is at index 0.
+ * If the size of the roaring bitmap is strictly greater than rank, then this
+ * function returns true and sets element to the element of given rank.
+ * Otherwise, it returns false.
+ */
+bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
+                           uint32_t *element);
+
+/**
+ * roaring_bitmap_rank returns the number of integers that are smaller or equal
+ * to x. Thus if x is the first element, this function will return 1. If
+ * x is smaller than the smallest element, this function will return 0.
+ *
+ * The indexing convention differs between roaring_bitmap_select and
+ * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value
+ * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking
+ * the smallest value.
+ */
+uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Returns the smallest value in the set, or UINT32_MAX if the set is empty.
+ */
+uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
+
+/**
+ * Returns the greatest value in the set, or 0 if the set is empty.
+ */
+uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
+
+/**
+ * (For advanced users.)
+ *
+ * Collect statistics about the bitmap, see roaring_types.h for
+ * a description of roaring_statistics_t
+ */
+void roaring_bitmap_statistics(const roaring_bitmap_t *r,
+                               roaring_statistics_t *stat);
+
+/*********************
+* What follows is code use to iterate through values in a roaring bitmap
+
+roaring_bitmap_t *r =...
+roaring_uint32_iterator_t i;
+roaring_create_iterator(r, &i);
+while(i.has_value) {
+  printf("value = %d\n", i.current_value);
+  roaring_advance_uint32_iterator(&i);
+}
+
+Obviously, if you modify the underlying bitmap, the iterator
+becomes invalid. So don't.
+*/
+
+typedef struct roaring_uint32_iterator_s {
+    const roaring_bitmap_t *parent;  // owner
+    int32_t container_index;         // point to the current container index
+    int32_t in_container_index;  // for bitset and array container, this is out
+                                 // index
+    int32_t run_index;           // for run container, this points  at the run
+
+    uint32_t current_value;
+    bool has_value;
+
+    const ROARING_CONTAINER_T
+        *container;  // should be:
+                     // parent->high_low_container.containers[container_index];
+    uint8_t typecode;  // should be:
+                       // parent->high_low_container.typecodes[container_index];
+    uint32_t highbits;  // should be:
+                        // parent->high_low_container.keys[container_index]) <<
+                        // 16;
+
+} roaring_uint32_iterator_t;
+
+/**
+ * Initialize an iterator object that can be used to iterate through the
+ * values. If there is a  value, then this iterator points to the first value
+ * and `it->has_value` is true. The value is in `it->current_value`.
+ */
+void roaring_init_iterator(const roaring_bitmap_t *r,
+                           roaring_uint32_iterator_t *newit);
+
+/**
+ * Initialize an iterator object that can be used to iterate through the
+ * values. If there is a value, then this iterator points to the last value
+ * and `it->has_value` is true. The value is in `it->current_value`.
+ */
+void roaring_init_iterator_last(const roaring_bitmap_t *r,
+                                roaring_uint32_iterator_t *newit);
+
+/**
+ * Create an iterator object that can be used to iterate through the values.
+ * Caller is responsible for calling `roaring_free_iterator()`.
+ *
+ * The iterator is initialized (this function calls `roaring_init_iterator()`)
+ * If there is a value, then this iterator points to the first value and
+ * `it->has_value` is true.  The value is in `it->current_value`.
+ */
+roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r);
+
+/**
+* Advance the iterator. If there is a new value, then `it->has_value` is true.
+* The new value is in `it->current_value`. Values are traversed in increasing
+* orders. For convenience, returns `it->has_value`.
+*/
+bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
+
+/**
+* Decrement the iterator. If there's a new value, then `it->has_value` is true.
+* The new value is in `it->current_value`. Values are traversed in decreasing
+* order. For convenience, returns `it->has_value`.
+*/
+bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
+
+/**
+ * Move the iterator to the first value >= `val`. If there is a such a value,
+ * then `it->has_value` is true. The new value is in `it->current_value`.
+ * For convenience, returns `it->has_value`.
+ */
+bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+                                                uint32_t val);
+
+/**
+ * Creates a copy of an iterator.
+ * Caller must free it.
+ */
+roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+    const roaring_uint32_iterator_t *it);
+
+/**
+ * Free memory following `roaring_create_iterator()`
+ */
+void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
+
+/*
+ * Reads next ${count} values from iterator into user-supplied ${buf}.
+ * Returns the number of read elements.
+ * This number can be smaller than ${count}, which means that iterator is drained.
+ *
+ * This function satisfies semantics of iteration and can be used together with
+ * other iterator functions.
+ *  - first value is copied from ${it}->current_value
+ *  - after function returns, iterator is positioned at the next element
+ */
+uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
+                                      uint32_t* buf, uint32_t count);
+
+#ifdef __cplusplus
+} } }  // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif  /* ROARING_H */
+
+#ifdef __cplusplus
+    /**
+     * Best practices for C++ headers is to avoid polluting global scope.
+     * But for C compatibility when just `roaring.h` is included building as
+     * C++, default to global access for the C public API.
+     *
+     * BUT when `roaring.hh` is included instead, it sets this flag.  That way
+     * explicit namespacing must be used to get the C functions.
+     *
+     * This is outside the include guard so that if you include BOTH headers,
+     * the order won't matter; you still get the global definitions.
+     */
+    #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
+        using namespace ::roaring::api;
+    #endif
+#endif
+/* end file include/roaring/roaring.h */
+/* begin file include/roaring/memory.h */
+#ifndef INCLUDE_ROARING_MEMORY_H_
+#define INCLUDE_ROARING_MEMORY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>  // for size_t
+
+typedef void* (*roaring_malloc_p)(size_t);
+typedef void* (*roaring_realloc_p)(void*, size_t);
+typedef void* (*roaring_calloc_p)(size_t, size_t);
+typedef void (*roaring_free_p)(void*);
+typedef void* (*roaring_aligned_malloc_p)(size_t, size_t);
+typedef void (*roaring_aligned_free_p)(void*);
+
+typedef struct roaring_memory_s {
+    roaring_malloc_p malloc;
+    roaring_realloc_p realloc;
+    roaring_calloc_p calloc;
+    roaring_free_p free;
+    roaring_aligned_malloc_p aligned_malloc;
+    roaring_aligned_free_p aligned_free;
+} roaring_memory_t;
+
+void roaring_init_memory_hook(roaring_memory_t memory_hook);
+
+void* roaring_malloc(size_t);
+void* roaring_realloc(void*, size_t);
+void* roaring_calloc(size_t, size_t);
+void roaring_free(void*);
+void* roaring_aligned_malloc(size_t, size_t);
+void roaring_aligned_free(void*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INCLUDE_ROARING_MEMORY_H_
+/* end file include/roaring/memory.h */