diff options
Diffstat (limited to 'src/lib/third_party/include')
-rw-r--r-- | src/lib/third_party/include/roaring.h | 2031 | ||||
-rw-r--r-- | src/lib/third_party/include/roaring_v2.h | 1143 |
2 files changed, 3039 insertions, 135 deletions
diff --git a/src/lib/third_party/include/roaring.h b/src/lib/third_party/include/roaring.h index 500ba9cb9..4e356ef9a 100644 --- a/src/lib/third_party/include/roaring.h +++ b/src/lib/third_party/include/roaring.h @@ -1,5 +1,8 @@ +#ifdef USE_ROARING_V2 +#include "roaring_v2.h" +#else // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2023-02-12T11:34:02Z +// Created by amalgamation.sh on 2024-03-20T03:56:45Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -58,11 +61,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.9.6" +#define ROARING_VERSION "3.0.0" enum { - ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 9, - ROARING_VERSION_REVISION = 6 + ROARING_VERSION_MAJOR = 3, + ROARING_VERSION_MINOR = 0, + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION /* end file include/roaring/roaring_version.h */ @@ -78,10 +81,11 @@ enum { #include <stdint.h> #ifdef __cplusplus -extern "C" { namespace roaring { namespace api { +extern "C" { +namespace roaring { +namespace api { #endif - /** * When building .c files as C++, there's added compile-time checking if the * container types are derived from a `container_t` base class. So long as @@ -95,12 +99,12 @@ extern "C" { namespace roaring { namespace api { * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;` */ #if defined(__cplusplus) - extern "C++" { - struct container_s {}; - } - #define ROARING_CONTAINER_T ::roaring::api::container_s +extern "C++" { +struct container_s {}; +} +#define ROARING_CONTAINER_T ::roaring::api::container_s #else - #define ROARING_CONTAINER_T void // no compile-time checking +#define ROARING_CONTAINER_T void // no compile-time checking #endif #define ROARING_FLAG_COW UINT8_C(0x1) @@ -125,15 +129,14 @@ typedef struct roaring_array_s { uint8_t flags; } roaring_array_t; - typedef bool (*roaring_iterator)(uint32_t value, void *param); typedef bool (*roaring_iterator64)(uint64_t value, void *param); /** -* (For advanced users.) -* The roaring_statistics_t can be used to collect detailed statistics about -* the composition of a roaring bitmap. -*/ + * (For advanced users.) + * The roaring_statistics_t can be used to collect detailed statistics about + * the composition of a roaring bitmap. + */ typedef struct roaring_statistics_s { uint32_t n_containers; /* number of containers */ @@ -166,12 +169,912 @@ typedef struct roaring_statistics_s { // and n_values_arrays, n_values_rle, n_values_bitmap } roaring_statistics_t; +/** + * Roaring-internal type used to iterate within a roaring container. + */ +typedef struct roaring_container_iterator_s { + // For bitset and array containers this is the index of the bit / entry. + // For run containers this points at the run. + int32_t index; +} roaring_container_iterator_t; + #ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace api { +} +} +} // extern "C" { namespace roaring { namespace api { #endif #endif /* ROARING_TYPES_H */ /* end file include/roaring/roaring_types.h */ +/* begin file include/roaring/portability.h */ +/* + * portability.h + * + */ + +/** + * All macros should be prefixed with either CROARING or ROARING. + * The library uses both ROARING_... + * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for + * macros that are provided by the build system or that are closely + * related to the format. The header macros may also use ROARING_. + * The CROARING_ prefix is for internal macros that a user is unlikely + * to ever interact with. + */ + +#ifndef INCLUDE_PORTABILITY_H_ +#define INCLUDE_PORTABILITY_H_ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif // _GNU_SOURCE +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS 1 +#endif // __STDC_FORMAT_MACROS + +#ifdef _MSC_VER +#define CROARING_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + */ +#ifdef __clang__ +// clang under visual studio +#define CROARING_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define CROARING_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER +#ifndef CROARING_VISUAL_STUDIO +#define CROARING_VISUAL_STUDIO 0 +#endif +#ifndef CROARING_CLANG_VISUAL_STUDIO +#define CROARING_CLANG_VISUAL_STUDIO 0 +#endif +#ifndef CROARING_REGULAR_VISUAL_STUDIO +#define CROARING_REGULAR_VISUAL_STUDIO 0 +#endif + +#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) +#undef _POSIX_C_SOURCE +#endif + +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) +#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) +#define _XOPEN_SOURCE 700 +#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) + +#ifdef __illumos__ +#define __EXTENSIONS__ +#endif + +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> // will provide posix_memalign with _POSIX_C_SOURCE as defined above +#ifdef __GLIBC__ +#include <malloc.h> // this should never be needed but there are some reports that it is needed. +#endif + +#ifdef __cplusplus +extern "C" { // portability definitions are in global scope, not a namespace +#endif + +#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 +#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. +#endif + +#if CROARING_REGULAR_VISUAL_STUDIO +#ifndef __restrict__ +#define __restrict__ __restrict +#endif // __restrict__ +#endif // CROARING_REGULAR_VISUAL_STUDIO + +#if defined(__x86_64__) || defined(_M_X64) +// we have an x64 processor +#define CROARING_IS_X64 1 + +#if defined(_MSC_VER) && (_MSC_VER < 1910) +// Old visual studio systems won't support AVX2 well. +#undef CROARING_IS_X64 +#endif + +#if defined(__clang_major__) && (__clang_major__ <= 8) && !defined(__AVX2__) +// Older versions of clang have a bug affecting us +// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces +#undef CROARING_IS_X64 +#endif + +#ifdef ROARING_DISABLE_X64 +#undef CROARING_IS_X64 +#endif +// we include the intrinsic header +#if !CROARING_REGULAR_VISUAL_STUDIO +/* Non-Microsoft C/C++-compatible compiler */ +#include <x86intrin.h> // on some recent GCC, this will declare posix_memalign + +#if CROARING_CLANG_VISUAL_STUDIO + +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. These headers would + * normally guard against such usage, but we carefully included + * <x86intrin.h> (or <intrin.h>) before, so the headers + * are fooled. + */ +// To avoid reordering imports: +// clang-format off +#include <bmiintrin.h> // for _blsr_u64 +#include <lzcntintrin.h> // for __lzcnt64 +#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64) +#include <smmintrin.h> +#include <tmmintrin.h> +#include <avxintrin.h> +#include <avx2intrin.h> +#include <wmmintrin.h> +#if _MSC_VER >= 1920 +// Important: we need the AVX-512 headers: +#include <avx512fintrin.h> +#include <avx512dqintrin.h> +#include <avx512cdintrin.h> +#include <avx512bwintrin.h> +#include <avx512vlintrin.h> +#include <avx512vbmiintrin.h> +#include <avx512vbmi2intrin.h> +#include <avx512vpopcntdqintrin.h> +// clang-format on +#endif // _MSC_VER >= 1920 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +#endif // CROARING_REGULAR_VISUAL_STUDIO +#endif // defined(__x86_64__) || defined(_M_X64) + +#if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) +#define CROARING_USENEON +#endif +#if defined(CROARING_USENEON) +#include <arm_neon.h> +#endif + +#if !CROARING_REGULAR_VISUAL_STUDIO +/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline + * assembly */ +#define CROARING_INLINE_ASM 1 +#endif // _MSC_VER + +#if CROARING_REGULAR_VISUAL_STUDIO +/* Microsoft C/C++-compatible compiler */ +#include <intrin.h> + +#ifndef __clang__ // if one compiles with MSVC *with* clang, then these + // intrinsics are defined!!! +#define CROARING_INTRINSICS 1 +// sadly there is no way to check whether we are missing these intrinsics +// specifically. + +/* wrappers for Visual Studio built-ins that look like gcc built-ins + * __builtin_ctzll */ +/** result might be undefined when input_num is zero */ +static inline int roaring_trailing_zeroes(unsigned long long input_num) { + unsigned long index; +#ifdef _WIN64 // highly recommended!!! + _BitScanForward64(&index, input_num); +#else // if we must support 32-bit Windows + if ((uint32_t)input_num != 0) { + _BitScanForward(&index, (uint32_t)input_num); + } else { + _BitScanForward(&index, (uint32_t)(input_num >> 32)); + index += 32; + } +#endif // _WIN64 + return index; +} + +/* wrappers for Visual Studio built-ins that look like gcc built-ins + * __builtin_clzll */ +/** result might be undefined when input_num is zero */ +static inline int roaring_leading_zeroes(unsigned long long input_num) { + unsigned long index; +#ifdef _WIN64 // highly recommended!!! + _BitScanReverse64(&index, input_num); +#else // if we must support 32-bit Windows + if (input_num > 0xFFFFFFFF) { + _BitScanReverse(&index, (uint32_t)(input_num >> 32)); + index += 32; + } else { + _BitScanReverse(&index, (uint32_t)(input_num)); + } +#endif // _WIN64 + return 63 - index; +} + +/* Use #define so this is effective even under /Ob0 (no inline) */ +#define roaring_unreachable __assume(0) +#endif // __clang__ + +#endif // CROARING_REGULAR_VISUAL_STUDIO + +#ifndef CROARING_INTRINSICS +#define CROARING_INTRINSICS 1 +#define roaring_unreachable __builtin_unreachable() +/** result might be undefined when input_num is zero */ +static inline int roaring_trailing_zeroes(unsigned long long input_num) { + return __builtin_ctzll(input_num); +} +/** result might be undefined when input_num is zero */ +static inline int roaring_leading_zeroes(unsigned long long input_num) { + return __builtin_clzll(input_num); +} +#endif + +#if CROARING_REGULAR_VISUAL_STUDIO +#define ALIGNED(x) __declspec(align(x)) +#elif defined(__GNUC__) || defined(__clang__) +#define ALIGNED(x) __attribute__((aligned(x))) +#else +#warning "Warning. Unrecognized compiler." +#define ALIGNED(x) +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define CROARING_WARN_UNUSED __attribute__((warn_unused_result)) +#else +#define CROARING_WARN_UNUSED +#endif + +#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) + +#ifdef CROARING_USENEON +// we can always compute the popcount fast. +#elif (defined(_M_ARM) || defined(_M_ARM64)) && \ + ((defined(_WIN64) || defined(_WIN32)) && \ + defined(CROARING_REGULAR_VISUAL_STUDIO) && \ + CROARING_REGULAR_VISUAL_STUDIO) +// we will need this function: +static inline int roaring_hamming_backup(uint64_t x) { + uint64_t c1 = UINT64_C(0x5555555555555555); + uint64_t c2 = UINT64_C(0x3333333333333333); + uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F); + x -= (x >> 1) & c1; + x = ((x >> 2) & c2) + (x & c2); + x = (x + (x >> 4)) & c4; + x *= UINT64_C(0x0101010101010101); + return x >> 56; +} +#endif + +static inline int roaring_hamming(uint64_t x) { +#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \ + CROARING_REGULAR_VISUAL_STUDIO +#ifdef CROARING_USENEON + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +#elif defined(_M_ARM64) + return roaring_hamming_backup(x); + // (int) _CountOneBits64(x); is unavailable +#else // _M_ARM64 + return (int)__popcnt64(x); +#endif // _M_ARM64 +#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \ + CROARING_REGULAR_VISUAL_STUDIO +#ifdef _M_ARM + return roaring_hamming_backup(x); + // _CountOneBits is unavailable +#else // _M_ARM + return (int)__popcnt((unsigned int)x) + + (int)__popcnt((unsigned int)(x >> 32)); +#endif // _M_ARM +#else + return __builtin_popcountll(x); +#endif +} + +#ifndef UINT64_C +#define UINT64_C(c) (c##ULL) +#endif // UINT64_C + +#ifndef UINT32_C +#define UINT32_C(c) (c##UL) +#endif // UINT32_C + +#ifdef __cplusplus +} // extern "C" { +#endif // __cplusplus + +// this is almost standard? +#undef STRINGIFY_IMPLEMENTATION_ +#undef STRINGIFY +#define STRINGIFY_IMPLEMENTATION_(a) #a +#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The library should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select +// CROARING_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if CROARING_IS_X64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be +// *outside* of a namespace. +#define CROARING_TARGET_REGION(T) \ + _Pragma(STRINGIFY(clang attribute push(__attribute__((target(T))), \ + apply_to = function))) +#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define CROARING_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) +#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // CROARING_IS_X64 + +// Default target region macros don't do anything. +#ifndef CROARING_TARGET_REGION +#define CROARING_TARGET_REGION(T) +#define CROARING_UNTARGET_REGION +#endif + +#define CROARING_TARGET_AVX2 \ + CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#define CROARING_TARGET_AVX512 \ + CROARING_TARGET_REGION( \ + "avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw," \ + "avx512vbmi2,avx512bitalg,avx512vpopcntdq") +#define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION +#define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION + +#ifdef __AVX2__ +// No need for runtime dispatching. +// It is unnecessary and harmful to old clang to tag regions. +#undef CROARING_TARGET_AVX2 +#define CROARING_TARGET_AVX2 +#undef CROARING_UNTARGET_AVX2 +#define CROARING_UNTARGET_AVX2 +#endif + +#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && \ + defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && \ + defined(__AVX512VPOPCNTDQ__) +// No need for runtime dispatching. +// It is unnecessary and harmful to old clang to tag regions. +#undef CROARING_TARGET_AVX512 +#define CROARING_TARGET_AVX512 +#undef CROARING_UNTARGET_AVX512 +#define CROARING_UNTARGET_AVX512 +#endif + +// Allow unaligned memory access +#if defined(__GNUC__) || defined(__clang__) +#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment"))) +#else +#define ALLOW_UNALIGNED +#endif + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) +#define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined(_WIN32) +#define CROARING_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || \ + defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined + // __ORDER_BIG_ENDIAN__ +#include <machine/endian.h> +#elif defined(sun) || \ + defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) +#include <sys/byteorder.h> +#else // defined(__APPLE__) || defined(__FreeBSD__) + +#ifdef __has_include +#if __has_include(<endian.h>) +#include <endian.h> +#endif //__has_include(<endian.h>) +#endif //__has_include + +#endif // defined(__APPLE__) || defined(__FreeBSD__) + +#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) +#define CROARING_IS_BIG_ENDIAN 0 +#endif + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define CROARING_IS_BIG_ENDIAN 0 +#else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define CROARING_IS_BIG_ENDIAN 1 +#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#endif + +// Host <-> big endian conversion. +#if CROARING_IS_BIG_ENDIAN +#define croaring_htobe64(x) (x) + +#elif defined(_WIN32) || defined(_WIN64) // CROARING_IS_BIG_ENDIAN +#include <stdlib.h> +#define croaring_htobe64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) // CROARING_IS_BIG_ENDIAN +#include <libkern/OSByteOrder.h> +#define croaring_htobe64(x) OSSwapInt64(x) + +#elif defined(__has_include) && \ + __has_include(<byteswap.h>) // CROARING_IS_BIG_ENDIAN +#include <byteswap.h> +#define croaring_htobe64(x) __bswap_64(x) + +#else // CROARING_IS_BIG_ENDIAN +// Gets compiled to bswap or equivalent on most compilers. +#define croaring_htobe64(x) \ + (((x & 0x00000000000000FFULL) << 56) | \ + ((x & 0x000000000000FF00ULL) << 40) | \ + ((x & 0x0000000000FF0000ULL) << 24) | \ + ((x & 0x00000000FF000000ULL) << 8) | ((x & 0x000000FF00000000ULL) >> 8) | \ + ((x & 0x0000FF0000000000ULL) >> 24) | \ + ((x & 0x00FF000000000000ULL) >> 40) | \ + ((x & 0xFF00000000000000ULL) >> 56)) +#endif // CROARING_IS_BIG_ENDIAN +#define croaring_be64toh(x) croaring_htobe64(x) +// End of host <-> big endian conversion. + +// Defines for the possible CROARING atomic implementations +#define CROARING_ATOMIC_IMPL_NONE 1 +#define CROARING_ATOMIC_IMPL_CPP 2 +#define CROARING_ATOMIC_IMPL_C 3 +#define CROARING_ATOMIC_IMPL_C_WINDOWS 4 + +// If the use has forced a specific implementation, use that, otherwise, +// figure out the best implementation we can use. +#if !defined(CROARING_ATOMIC_IMPL) +#if defined(__cplusplus) && __cplusplus >= 201103L +#ifdef __has_include +#if __has_include(<atomic>) +#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP +#endif //__has_include(<atomic>) +#else + // We lack __has_include to check: +#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP +#endif //__has_include +#elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) +#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C +#elif CROARING_REGULAR_VISUAL_STUDIO + // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/ +#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS +#endif +#endif // !defined(CROARING_ATOMIC_IMPL) + +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C +#include <stdatomic.h> +typedef _Atomic(uint32_t) croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + // Increasing the reference counter can always be done with + // memory_order_relaxed: New references to an object can only be formed from + // an existing reference, and passing an existing reference from one thread + // to another must already provide any required synchronization. + atomic_fetch_add_explicit(val, 1, memory_order_relaxed); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + // It is important to enforce any possible access to the object in one + // thread (through an existing reference) to happen before deleting the + // object in a different thread. This is achieved by a "release" operation + // after dropping a reference (any access to the object through this + // reference must obviously happened before), and an "acquire" operation + // before deleting the object. + bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1; + if (is_zero) { + atomic_thread_fence(memory_order_acquire); + } + return is_zero; +} + +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + return atomic_load_explicit(val, memory_order_relaxed); +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP +#include <atomic> +typedef std::atomic<uint32_t> croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + val->fetch_add(1, std::memory_order_relaxed); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + // See above comments on the c11 atomic implementation for memory ordering + bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1; + if (is_zero) { + std::atomic_thread_fence(std::memory_order_acquire); + } + return is_zero; +} + +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + return val->load(std::memory_order_relaxed); +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS +#include <intrin.h> +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) + +// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and +// overflow is defined to wrap, so we can pretend it is a uint32_t for our case +typedef volatile long croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + _InterlockedIncrement(val); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + return _InterlockedDecrement(val) == 0; +} + +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + // Per + // https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access + // > Simple reads and writes to properly-aligned 32-bit variables are atomic + // > operations. In other words, you will not end up with only one portion + // > of the variable updated; all bits are updated in an atomic fashion. + return *val; +} +//#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE +#else +#include <assert.h> +typedef uint32_t croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + *val += 1; +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + assert(*val > 0); + *val -= 1; + return val == 0; +} + +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + return *val; +} +//#else +//#error "Unknown atomic implementation" +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define CROARING_DEPRECATED __attribute__((deprecated)) +#else +#define CROARING_DEPRECATED +#endif // defined(__GNUC__) || defined(__clang__) + +// We need portability.h to be included first, +// but we also always want isadetection.h to be +// included (right after). +// See https://github.com/RoaringBitmap/CRoaring/issues/394 +// There is no scenario where we want portability.h to +// be included, but not isadetection.h: the latter is a +// strict requirement. +#endif /* INCLUDE_PORTABILITY_H_ */ +/* end file include/roaring/portability.h */ +/* begin file include/roaring/bitset/bitset.h */ +#ifndef CBITSET_BITSET_H +#define CBITSET_BITSET_H + +// For compatibility with MSVC with the use of `restrict` +#if (__STDC_VERSION__ >= 199901L) || \ + (defined(__GNUC__) && defined(__STDC_VERSION__)) +#define CBITSET_RESTRICT restrict +#else +#define CBITSET_RESTRICT +#endif // (__STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && + // defined(__STDC_VERSION__ )) + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace api { +#endif + +struct bitset_s { + uint64_t *CBITSET_RESTRICT array; + /* For simplicity and performance, we prefer to have a size and a capacity + * that is a multiple of 64 bits. Thus we only track the size and the + * capacity in terms of 64-bit words allocated */ + size_t arraysize; + size_t capacity; +}; + +typedef struct bitset_s bitset_t; + +/* Create a new bitset. Return NULL in case of failure. */ +bitset_t *bitset_create(void); + +/* Create a new bitset able to contain size bits. Return NULL in case of + * failure. */ +bitset_t *bitset_create_with_capacity(size_t size); + +/* Free memory. */ +void bitset_free(bitset_t *bitset); + +/* Set all bits to zero. */ +void bitset_clear(bitset_t *bitset); + +/* Set all bits to one. */ +void bitset_fill(bitset_t *bitset); + +/* Create a copy */ +bitset_t *bitset_copy(const bitset_t *bitset); + +/* For advanced users: Resize the bitset so that it can support newarraysize * + * 64 bits. Return true in case of success, false for failure. Pad with zeroes + * new buffer areas if requested. */ +bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes); + +/* returns how many bytes of memory the backend buffer uses */ +static inline size_t bitset_size_in_bytes(const bitset_t *bitset) { + return bitset->arraysize * sizeof(uint64_t); +} + +/* returns how many bits can be accessed */ +static inline size_t bitset_size_in_bits(const bitset_t *bitset) { + return bitset->arraysize * 64; +} + +/* returns how many words (64-bit) of memory the backend buffer uses */ +static inline size_t bitset_size_in_words(const bitset_t *bitset) { + return bitset->arraysize; +} + +/* For advanced users: Grow the bitset so that it can support newarraysize * 64 + * bits with padding. Return true in case of success, false for failure. */ +bool bitset_grow(bitset_t *bitset, size_t newarraysize); + +/* attempts to recover unused memory, return false in case of + * roaring_reallocation failure */ +bool bitset_trim(bitset_t *bitset); + +/* shifts all bits by 's' positions so that the bitset representing values + * 1,2,10 would represent values 1+s, 2+s, 10+s */ +void bitset_shift_left(bitset_t *bitset, size_t s); + +/* shifts all bits by 's' positions so that the bitset representing values + * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */ +void bitset_shift_right(bitset_t *bitset, size_t s); + +/* Set the ith bit. Attempts to resize the bitset if needed (may silently fail) + */ +static inline void bitset_set(bitset_t *bitset, size_t i) { + size_t shiftedi = i / 64; + if (shiftedi >= bitset->arraysize) { + if (!bitset_grow(bitset, shiftedi + 1)) { + return; + } + } + bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64); +} + +/* Set the ith bit to the specified value. Attempts to resize the bitset if + * needed (may silently fail) */ +static inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) { + size_t shiftedi = i / 64; + uint64_t mask = ((uint64_t)1) << (i % 64); + uint64_t dynmask = ((uint64_t)flag) << (i % 64); + if (shiftedi >= bitset->arraysize) { + if (!bitset_grow(bitset, shiftedi + 1)) { + return; + } + } + uint64_t w = bitset->array[shiftedi]; + w &= ~mask; + w |= dynmask; + bitset->array[shiftedi] = w; +} + +/* Get the value of the ith bit. */ +static inline bool bitset_get(const bitset_t *bitset, size_t i) { + size_t shiftedi = i / 64; + if (shiftedi >= bitset->arraysize) { + return false; + } + return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0; +} + +/* Count number of bits set. */ +size_t bitset_count(const bitset_t *bitset); + +/* Find the index of the first bit set. Or zero if the bitset is empty. */ +size_t bitset_minimum(const bitset_t *bitset); + +/* Find the index of the last bit set. Or zero if the bitset is empty. */ +size_t bitset_maximum(const bitset_t *bitset); + +/* compute the union in-place (to b1), returns true if successful, to generate a + * new bitset first call bitset_copy */ +bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* report the size of the union (without materializing it) */ +size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the intersection in-place (to b1), to generate a new bitset first + * call bitset_copy */ +void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* report the size of the intersection (without materializing it) */ +size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* returns true if the bitsets contain no common elements */ +bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* returns true if the bitsets contain any common elements */ +bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* returns true if b1 contains all of the set bits of b2 */ +bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the difference in-place (to b1), to generate a new bitset first call + * bitset_copy */ +void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the size of the difference */ +size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the symmetric difference in-place (to b1), return true if successful, + * to generate a new bitset first call bitset_copy */ +bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the size of the symmetric difference */ +size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* iterate over the set bits + like so : + for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) { + //..... + } + */ +static inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) { + size_t x = *i / 64; + if (x >= bitset->arraysize) { + return false; + } + uint64_t w = bitset->array[x]; + w >>= (*i & 63); + if (w != 0) { + *i += roaring_trailing_zeroes(w); + return true; + } + x++; + while (x < bitset->arraysize) { + w = bitset->array[x]; + if (w != 0) { + *i = x * 64 + roaring_trailing_zeroes(w); + return true; + } + x++; + } + return false; +} + +/* iterate over the set bits + like so : + size_t buffer[256]; + size_t howmany = 0; + for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256, + &startfrom)) > 0 ; startfrom++) { + //..... + } + */ +static inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, + size_t capacity, size_t *startfrom) { + if (capacity == 0) return 0; // sanity check + size_t x = *startfrom / 64; + if (x >= bitset->arraysize) { + return 0; // nothing more to iterate over + } + uint64_t w = bitset->array[x]; + w >>= (*startfrom & 63); + size_t howmany = 0; + size_t base = x << 6; + while (howmany < capacity) { + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = roaring_trailing_zeroes(w); + buffer[howmany++] = r + base; + if (howmany == capacity) goto end; + w ^= t; + } + x += 1; + if (x == bitset->arraysize) { + break; + } + base += 64; + w = bitset->array[x]; + } +end: + if (howmany > 0) { + *startfrom = buffer[howmany - 1]; + } + return howmany; +} + +typedef bool (*bitset_iterator)(size_t value, void *param); + +// return true if uninterrupted +static inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, + void *ptr) { + size_t base = 0, i; + for (i = 0; i < b->arraysize; ++i) { + uint64_t w = b->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = roaring_trailing_zeroes(w); + if (!iterator(r + base, ptr)) return false; + w ^= t; + } + base += 64; + } + return true; +} + +static inline void bitset_print(const bitset_t *b) { + size_t i; + printf("{"); + for (i = 0; bitset_next_set_bit(b, &i); i++) { + printf("%zu, ", i); + } + printf("}"); +} + +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace api { +#endif + +#endif +/* end file include/roaring/bitset/bitset.h */ /* begin file include/roaring/roaring.h */ /* * An implementation of Roaring Bitmaps in C. @@ -181,12 +1084,16 @@ typedef struct roaring_statistics_s { #define ROARING_H #include <stdbool.h> -#include <stdint.h> #include <stddef.h> // for `size_t` +#include <stdint.h> + +// Include other headers after roaring_types.h #ifdef __cplusplus -extern "C" { namespace roaring { namespace api { +extern "C" { +namespace roaring { +namespace api { #endif typedef struct roaring_bitmap_s { @@ -206,8 +1113,9 @@ roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); * Returns NULL if the allocation fails. * Client is responsible for calling `roaring_bitmap_free()`. */ -static inline roaring_bitmap_t *roaring_bitmap_create(void) - { return roaring_bitmap_create_with_capacity(0); } +static inline roaring_bitmap_t *roaring_bitmap_create(void) { + return roaring_bitmap_create_with_capacity(0); +} /** * Initialize a roaring bitmap structure in memory controlled by client. @@ -221,13 +1129,14 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); * The bitmap will be in a "clear" state, with no auxiliary allocations. * Since this performs no allocations, the function will not fail. */ -static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) - { roaring_bitmap_init_with_capacity(r, 0); } +static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) { + roaring_bitmap_init_with_capacity(r, 0); +} /** * Add all the values between min (included) and max (excluded) that are at a * distance k*step from min. -*/ + */ roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step); @@ -245,11 +1154,10 @@ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); * do so for all of your bitmaps, since interactions between bitmaps with and * without COW is unsafe. */ -static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) { +static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r) { return r->high_low_container.flags & ROARING_FLAG_COW; } -static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, - bool cow) { +static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) { if (cow) { r->high_low_container.flags |= ROARING_FLAG_COW; } else { @@ -259,18 +1167,58 @@ static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, int64_t offset); - -#ifdef NDPI_ENABLE_DEBUG_MESSAGES /** * Describe the inner structure of the bitmap. */ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); -#endif /** * Creates a new bitmap from a list of uint32_t integers + * + * This function is deprecated, use `roaring_bitmap_from` instead, which + * doesn't require the number of elements to be passed in. + * + * @see roaring_bitmap_from */ -roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); +CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); + +#ifdef __cplusplus +/** + * Creates a new bitmap which contains all values passed in as arguments. + * + * To create a bitmap from a variable number of arguments, use the + * `roaring_bitmap_of_ptr` function instead. + */ +// Use an immediately invoked closure, capturing by reference +// (in case __VA_ARGS__ refers to context outside the closure) +// Include a 0 at the beginning of the array to make the array length > 0 +// (zero sized arrays are not valid in standard c/c++) +#define roaring_bitmap_from(...) \ + [&]() { \ + const uint32_t roaring_bitmap_from_array[] = {0, __VA_ARGS__}; \ + return roaring_bitmap_of_ptr((sizeof(roaring_bitmap_from_array) / \ + sizeof(roaring_bitmap_from_array[0])) - \ + 1, \ + &roaring_bitmap_from_array[1]); \ + }() +#else +/** + * Creates a new bitmap which contains all values passed in as arguments. + * + * To create a bitmap from a variable number of arguments, use the + * `roaring_bitmap_of_ptr` function instead. + */ +// While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof +// expression, which is an unevaluated context, so it's even safe in the case +// where expressions passed have side effects (roaring64_bitmap_from(my_func(), +// ++i)) +// Include a 0 at the beginning of the array to make the array length > 0 +// (zero sized arrays are not valid in standard c/c++) +#define roaring_bitmap_from(...) \ + roaring_bitmap_of_ptr( \ + (sizeof((const uint32_t[]){0, __VA_ARGS__}) / sizeof(uint32_t)) - 1, \ + &((const uint32_t[]){0, __VA_ARGS__})[1]) +#endif /** * Copies a bitmap (this does memory allocation). @@ -285,16 +1233,18 @@ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); * * It might be preferable and simpler to call roaring_bitmap_copy except * that roaring_bitmap_overwrite can save on memory allocations. + * + * Returns true if successful, or false if there was an error. On failure, + * the dest bitmap is left in a valid, empty state (even if it was not empty + * before). */ bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, const roaring_bitmap_t *src); -#ifdef NDPI_ENABLE_DEBUG_MESSAGES /** * Print the content of the bitmap. */ void roaring_bitmap_printf(const roaring_bitmap_t *r); -#endif /** * Computes the intersection between two bitmaps and returns new bitmap. The @@ -321,10 +1271,10 @@ bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** - * Check whether a bitmap and a closed range intersect. + * Check whether a bitmap and an open range intersect. */ -bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, - uint64_t x, uint64_t y); +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, + uint64_t y); /** * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto @@ -502,15 +1452,15 @@ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); /** * Add all values in range [min, max] */ -void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, - uint32_t min, uint32_t max); +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, + uint32_t max); /** * Add all values in range [min, max) */ -static inline void roaring_bitmap_add_range(roaring_bitmap_t *r, - uint64_t min, uint64_t max) { - if(max == min) return; +static inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, + uint64_t max) { + if (max <= min) return; roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -522,15 +1472,15 @@ void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); /** * Remove all values in range [min, max] */ -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, - uint32_t min, uint32_t max); +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, + uint32_t max); /** * Remove all values in range [min, max) */ -static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, - uint64_t min, uint64_t max) { - if(max == min) return; +static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, + uint64_t max) { + if (max <= min) return; roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -556,12 +1506,11 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); * to range_end (excluded) is present */ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, - uint64_t range_start, - uint64_t range_end); + uint64_t range_start, uint64_t range_end); /** - * Check if an items is present, using context from a previous insert for speed - * optimization. + * Check if an items is present, using context from a previous insert or search + * for speed optimization. * * `context` will be used to store information between calls to make bulk * operations faster. `*context` should be zero-initialized before the first @@ -591,11 +1540,10 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_end); /** -* Returns true if the bitmap is empty (cardinality is zero). -*/ + * Returns true if the bitmap is empty (cardinality is zero). + */ bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); - /** * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap * was initialized in client memory via roaring_bitmap_init(), then a call to @@ -612,9 +1560,27 @@ void roaring_bitmap_clear(roaring_bitmap_t *r); */ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); +/** + * Store the bitmap to a bitset. This can be useful for people + * who need the performance and simplicity of a standard bitset. + * We assume that the input bitset is originally empty (does not + * have any set bit). + * + * bitset_t * out = bitset_create(); + * // if the bitset has content in it, call "bitset_clear(out)" + * bool success = roaring_bitmap_to_bitset(mybitmap, out); + * // on failure, success will be false. + * // You can then query the bitset: + * bool is_present = bitset_get(out, 10011 ); + * // you must free the memory: + * bitset_free(out); + * + */ +bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset); /** - * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`. + * Convert the bitmap to a sorted array from `offset` by `limit`, output in + * `ans`. * * Caller is responsible to ensure that there is enough memory allocated, e.g. * @@ -622,9 +1588,8 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); * * Return false in case of failure (e.g., insufficient memory) */ -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, - size_t offset, size_t limit, - uint32_t *ans); +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, + size_t limit, uint32_t *ans); /** * Remove run-length encoding even when it is more space efficient. @@ -657,8 +1622,9 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); @@ -668,12 +1634,30 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); * (See `roaring_bitmap_portable_deserialize()` if you want a format that's * compatible with Java and Go implementations). * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); /** + * Use with `roaring_bitmap_serialize()`. + * + * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's + * compatible with Java and Go implementations). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + * + * The difference with `roaring_bitmap_deserialize()` is that this function + * checks that the input buffer is a valid bitmap. If the buffer is too small, + * NULL is returned. + */ +roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, + size_t maxbytes); + +/** * How many bytes are required to serialize this bitmap (NOT compatible * with Java and Go versions) */ @@ -689,9 +1673,10 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec -* - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); @@ -702,8 +1687,23 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * The function itself is safe in the sense that it will not cause buffer + * overflows. However, for correct operations, it is assumed that the bitmap + * read was once serialized from a valid bitmap (i.e., it follows the format + * specification). If you provided an incorrect input (garbage), then the bitmap + * read may not be in a valid state and following operations may not lead to + * sensible results. In particular, the serialized array containers need to be + * in sorted order, and the run containers should be in sorted non-overlapping + * order. This is is guaranteed to happen when serializing an existing bitmap, + * but not for random inputs. + * + * You may use roaring_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. You may also use other strategies to check for + * corrupted inputs (e.g., checksums). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); @@ -724,8 +1724,9 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); @@ -757,8 +1758,9 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); @@ -790,8 +1792,9 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); * Serializes bitmap using frozen format. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); @@ -806,8 +1809,9 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. * - * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), - * the data format is going to be big-endian and not compatible with little-endian systems. + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. */ const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); @@ -950,6 +1954,27 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); /** + * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank` + * it puts rank value of each element in `[begin .. end)` to `ans[]` + * + * the values in `[begin .. end)` must be sorted in Ascending order; + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * + * ans = malloc((end-begin) * sizeof(uint64_t)); + */ +void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t *begin, + const uint32_t *end, uint64_t *ans); + +/** + * Returns the index of x in the given roaring bitmap. + * If the roaring bitmap doesn't contain x , this function will return -1. + * The difference with rank function is that this function will return -1 when x + * is not the element of roaring bitmap, but the rank function will return a + * non-negative number. + */ +int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x); + +/** * Returns the smallest value in the set, or UINT32_MAX if the set is empty. */ uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); @@ -968,136 +1993,215 @@ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat); +/** + * Perform internal consistency checks. Returns true if the bitmap is + * consistent. It may be useful to call this after deserializing bitmaps from + * untrusted sources. If roaring_bitmap_internal_validate returns true, then the + * bitmap should be consistent and can be trusted not to cause crashes or memory + * corruption. + * + * Note that some operations intentionally leave bitmaps in an inconsistent + * state temporarily, for example, `roaring_bitmap_lazy_*` functions, until + * `roaring_bitmap_repair_after_lazy` is called. + * + * If reason is non-null, it will be set to a string describing the first + * inconsistency found if any. + */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, + const char **reason); + /********************* * What follows is code use to iterate through values in a roaring bitmap roaring_bitmap_t *r =... roaring_uint32_iterator_t i; -roaring_create_iterator(r, &i); +roaring_iterator_create(r, &i); while(i.has_value) { printf("value = %d\n", i.current_value); - roaring_advance_uint32_iterator(&i); + roaring_uint32_iterator_advance(&i); } Obviously, if you modify the underlying bitmap, the iterator becomes invalid. So don't. */ +/** + * A struct used to keep iterator state. Users should only access + * `current_value` and `has_value`, the rest of the type should be treated as + * opaque. + */ typedef struct roaring_uint32_iterator_s { - const roaring_bitmap_t *parent; // owner - int32_t container_index; // point to the current container index - int32_t in_container_index; // for bitset and array container, this is out - // index - int32_t run_index; // for run container, this points at the run + const roaring_bitmap_t *parent; // Owner + const ROARING_CONTAINER_T *container; // Current container + uint8_t typecode; // Typecode of current container + int32_t container_index; // Current container index + uint32_t highbits; // High 16 bits of the current value + roaring_container_iterator_t container_it; uint32_t current_value; bool has_value; - - const ROARING_CONTAINER_T - *container; // should be: - // parent->high_low_container.containers[container_index]; - uint8_t typecode; // should be: - // parent->high_low_container.typecodes[container_index]; - uint32_t highbits; // should be: - // parent->high_low_container.keys[container_index]) << - // 16; - } roaring_uint32_iterator_t; /** - * Initialize an iterator object that can be used to iterate through the - * values. If there is a value, then this iterator points to the first value - * and `it->has_value` is true. The value is in `it->current_value`. + * Initialize an iterator object that can be used to iterate through the values. + * If there is a value, then this iterator points to the first value and + * `it->has_value` is true. The value is in `it->current_value`. */ -void roaring_init_iterator(const roaring_bitmap_t *r, +void roaring_iterator_init(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); +/** DEPRECATED, use `roaring_iterator_init`. */ +CROARING_DEPRECATED static inline void roaring_init_iterator( + const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) { + roaring_iterator_init(r, newit); +} + /** - * Initialize an iterator object that can be used to iterate through the - * values. If there is a value, then this iterator points to the last value - * and `it->has_value` is true. The value is in `it->current_value`. + * Initialize an iterator object that can be used to iterate through the values. + * If there is a value, then this iterator points to the last value and + * `it->has_value` is true. The value is in `it->current_value`. */ -void roaring_init_iterator_last(const roaring_bitmap_t *r, +void roaring_iterator_init_last(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); +/** DEPRECATED, use `roaring_iterator_init_last`. */ +CROARING_DEPRECATED static inline void roaring_init_iterator_last( + const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) { + roaring_iterator_init_last(r, newit); +} + /** * Create an iterator object that can be used to iterate through the values. * Caller is responsible for calling `roaring_free_iterator()`. * - * The iterator is initialized (this function calls `roaring_init_iterator()`) + * The iterator is initialized (this function calls `roaring_iterator_init()`) * If there is a value, then this iterator points to the first value and * `it->has_value` is true. The value is in `it->current_value`. */ -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); +roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r); + +/** DEPRECATED, use `roaring_iterator_create`. */ +CROARING_DEPRECATED static inline roaring_uint32_iterator_t * +roaring_create_iterator(const roaring_bitmap_t *r) { + return roaring_iterator_create(r); +} /** -* Advance the iterator. If there is a new value, then `it->has_value` is true. -* The new value is in `it->current_value`. Values are traversed in increasing -* orders. For convenience, returns `it->has_value`. -*/ -bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); + * Advance the iterator. If there is a new value, then `it->has_value` is true. + * The new value is in `it->current_value`. Values are traversed in increasing + * orders. For convenience, returns `it->has_value`. + * + * Once `it->has_value` is false, `roaring_uint32_iterator_advance` should not + * be called on the iterator again. Calling `roaring_uint32_iterator_previous` + * is allowed. + */ +bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it); + +/** DEPRECATED, use `roaring_uint32_iterator_advance`. */ +CROARING_DEPRECATED static inline bool roaring_advance_uint32_iterator( + roaring_uint32_iterator_t *it) { + return roaring_uint32_iterator_advance(it); +} /** -* Decrement the iterator. If there's a new value, then `it->has_value` is true. -* The new value is in `it->current_value`. Values are traversed in decreasing -* order. For convenience, returns `it->has_value`. -*/ -bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); + * Decrement the iterator. If there's a new value, then `it->has_value` is true. + * The new value is in `it->current_value`. Values are traversed in decreasing + * order. For convenience, returns `it->has_value`. + * + * Once `it->has_value` is false, `roaring_uint32_iterator_previous` should not + * be called on the iterator again. Calling `roaring_uint32_iterator_advance` is + * allowed. + */ +bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it); + +/** DEPRECATED, use `roaring_uint32_iterator_previous`. */ +CROARING_DEPRECATED static inline bool roaring_previous_uint32_iterator( + roaring_uint32_iterator_t *it) { + return roaring_uint32_iterator_previous(it); +} /** * Move the iterator to the first value >= `val`. If there is a such a value, * then `it->has_value` is true. The new value is in `it->current_value`. * For convenience, returns `it->has_value`. */ -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, +bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val); +/** DEPRECATED, use `roaring_uint32_iterator_move_equalorlarger`. */ +CROARING_DEPRECATED static inline bool +roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, + uint32_t val) { + return roaring_uint32_iterator_move_equalorlarger(it, val); +} + /** * Creates a copy of an iterator. * Caller must free it. */ -roaring_uint32_iterator_t *roaring_copy_uint32_iterator( +roaring_uint32_iterator_t *roaring_uint32_iterator_copy( const roaring_uint32_iterator_t *it); +/** DEPRECATED, use `roaring_uint32_iterator_copy`. */ +CROARING_DEPRECATED static inline roaring_uint32_iterator_t * +roaring_copy_uint32_iterator(const roaring_uint32_iterator_t *it) { + return roaring_uint32_iterator_copy(it); +} + /** - * Free memory following `roaring_create_iterator()` + * Free memory following `roaring_iterator_create()` */ -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); +void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it); + +/** DEPRECATED, use `roaring_uint32_iterator_free`. */ +CROARING_DEPRECATED static inline void roaring_free_uint32_iterator( + roaring_uint32_iterator_t *it) { + roaring_uint32_iterator_free(it); +} /* * Reads next ${count} values from iterator into user-supplied ${buf}. * Returns the number of read elements. - * This number can be smaller than ${count}, which means that iterator is drained. + * This number can be smaller than ${count}, which means that iterator is + * drained. * * This function satisfies semantics of iteration and can be used together with * other iterator functions. * - first value is copied from ${it}->current_value * - after function returns, iterator is positioned at the next element */ -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, - uint32_t* buf, uint32_t count); +uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, + uint32_t *buf, uint32_t count); + +/** DEPRECATED, use `roaring_uint32_iterator_read`. */ +CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator( + roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) { + return roaring_uint32_iterator_read(it, buf, count); +} #ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace api { +} +} +} // extern "C" { namespace roaring { namespace api { #endif -#endif /* ROARING_H */ +#endif /* ROARING_H */ #ifdef __cplusplus - /** - * Best practices for C++ headers is to avoid polluting global scope. - * But for C compatibility when just `roaring.h` is included building as - * C++, default to global access for the C public API. - * - * BUT when `roaring.hh` is included instead, it sets this flag. That way - * explicit namespacing must be used to get the C functions. - * - * This is outside the include guard so that if you include BOTH headers, - * the order won't matter; you still get the global definitions. - */ - #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) - using namespace ::roaring::api; - #endif +/** + * Best practices for C++ headers is to avoid polluting global scope. + * But for C compatibility when just `roaring.h` is included building as + * C++, default to global access for the C public API. + * + * BUT when `roaring.hh` is included instead, it sets this flag. That way + * explicit namespacing must be used to get the C functions. + * + * This is outside the include guard so that if you include BOTH headers, + * the order won't matter; you still get the global definitions. + */ +#if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) +using namespace ::roaring::api; +#endif #endif /* end file include/roaring/roaring.h */ /* begin file include/roaring/memory.h */ @@ -1141,3 +2245,660 @@ void roaring_aligned_free(void*); #endif // INCLUDE_ROARING_MEMORY_H_ /* end file include/roaring/memory.h */ +/* begin file include/roaring/roaring64.h */ +#ifndef ROARING64_H +#define ROARING64_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace api { +#endif + +typedef struct roaring64_bitmap_s roaring64_bitmap_t; +typedef struct roaring64_leaf_s roaring64_leaf_t; +typedef struct roaring64_iterator_s roaring64_iterator_t; + +/** + * A bit of context usable with `roaring64_bitmap_*_bulk()` functions. + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap (unless re-initialized to + * zero), and any modification to a bitmap (other than modifications performed + * with `_bulk()` functions with the context passed) will invalidate any + * contexts associated with that bitmap. + */ +typedef struct roaring64_bulk_context_s { + uint8_t high_bytes[6]; + roaring64_leaf_t *leaf; +} roaring64_bulk_context_t; + +/** + * Dynamically allocates a new bitmap (initially empty). + * Client is responsible for calling `roaring64_bitmap_free()`. + */ +roaring64_bitmap_t *roaring64_bitmap_create(void); +void roaring64_bitmap_free(roaring64_bitmap_t *r); + +/** + * Returns a copy of a bitmap. + */ +roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r); + +/** + * Creates a new bitmap of a pointer to N 64-bit integers. + */ +roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, + const uint64_t *vals); + +#ifdef __cplusplus +/** + * Creates a new bitmap which contains all values passed in as arguments. + * + * To create a bitmap from a variable number of arguments, use the + * `roaring64_bitmap_of_ptr` function instead. + */ +// Use an immediately invoked closure, capturing by reference +// (in case __VA_ARGS__ refers to context outside the closure) +// Include a 0 at the beginning of the array to make the array length > 0 +// (zero sized arrays are not valid in standard c/c++) +#define roaring64_bitmap_from(...) \ + [&]() { \ + const uint64_t roaring64_bitmap_from_array[] = {0, __VA_ARGS__}; \ + return roaring64_bitmap_of_ptr( \ + (sizeof(roaring64_bitmap_from_array) / \ + sizeof(roaring64_bitmap_from_array[0])) - \ + 1, \ + &roaring64_bitmap_from_array[1]); \ + }() +#else +/** + * Creates a new bitmap which contains all values passed in as arguments. + * + * To create a bitmap from a variable number of arguments, use the + * `roaring64_bitmap_of_ptr` function instead. + */ +// While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof +// expression, which is an unevaluated context, so it's even safe in the case +// where expressions passed have side effects (roaring64_bitmap_from(my_func(), +// ++i)) +// Include a 0 at the beginning of the array to make the array length > 0 +// (zero sized arrays are not valid in standard c/c++) +#define roaring64_bitmap_from(...) \ + roaring64_bitmap_of_ptr( \ + (sizeof((const uint64_t[]){0, __VA_ARGS__}) / sizeof(uint64_t)) - 1, \ + &((const uint64_t[]){0, __VA_ARGS__})[1]) +#endif + +/** + * Create a new bitmap containing all the values in [min, max) that are at a + * distance k*step from min. + */ +roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, + uint64_t step); + +/** + * Adds the provided value to the bitmap. + */ +void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val); + +/** + * Adds the provided value to the bitmap. + * Returns true if a new value was added, false if the value already existed. + */ +bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val); + +/** + * Add an item, using context from a previous insert for faster insertion. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same high 48 bits of the value consecutively. + */ +void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, uint64_t val); + +/** + * Add `n_args` values from `vals`, faster than repeatedly calling + * `roaring64_bitmap_add()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same high 48 bits of the value as consecutive elements in + * `vals`. + */ +void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals); + +/** + * Add all values in range [min, max). + */ +void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Add all values in range [min, max]. + */ +void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Removes a value from the bitmap if present. + */ +void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val); + +/** + * Removes a value from the bitmap if present, returns true if the value was + * removed and false if the value was not present. + */ +bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val); + +/** + * Remove an item, using context from a previous insert for faster removal. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same high 48 bits of the value consecutively. + */ +void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val); + +/** + * Remove `n_args` values from `vals`, faster than repeatedly calling + * `roaring64_bitmap_remove()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same high 48 bits of the value as consecutive elements in + * `vals`. + */ +void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals); + +/** + * Remove all values in range [min, max). + */ +void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Remove all values in range [min, max]. + */ +void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Returns true if the provided value is present. + */ +bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val); + +/** + * Returns true if all values in the range [min, max) are present. + */ +bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Check if an item is present using context from a previous insert or search + * for faster search. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same high 48 bits of the value consecutively. + */ +bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val); + +/** + * Selects the element at index 'rank' where the smallest element is at index 0. + * If the size of the bitmap is strictly greater than rank, then this function + * returns true and sets element to the element of given rank. Otherwise, it + * returns false. + */ +bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, + uint64_t *element); + +/** + * Returns the number of integers that are smaller or equal to x. Thus if x is + * the first element, this function will return 1. If x is smaller than the + * smallest element, this function will return 0. + * + * The indexing convention differs between roaring64_bitmap_select and + * roaring64_bitmap_rank: roaring_bitmap64_select refers to the smallest value + * as having index 0, whereas roaring64_bitmap_rank returns 1 when ranking + * the smallest value. + */ +uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val); + +/** + * Returns true if the given value is in the bitmap, and sets `out_index` to the + * (0-based) index of the value in the bitmap. Returns false if the value is not + * in the bitmap. + */ +bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, + uint64_t *out_index); + +/** + * Returns the number of values in the bitmap. + */ +uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r); + +/** + * Returns the number of elements in the range [min, max). + */ +uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max); + +/** + * Returns true if the bitmap is empty (cardinality is zero). + */ +bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r); + +/** + * Returns the smallest value in the set, or UINT64_MAX if the set is empty. + */ +uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r); + +/** + * Returns the largest value in the set, or 0 if empty. + */ +uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); + +/** + * Returns true if the result has at least one run container. + */ +bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); + +/** + * Perform internal consistency checks. + * + * Returns true if the bitmap is consistent. It may be useful to call this + * after deserializing bitmaps from untrusted sources. If + * roaring64_bitmap_internal_validate returns true, then the bitmap is + * consistent and can be trusted not to cause crashes or memory corruption. + * + * If reason is non-null, it will be set to a string describing the first + * inconsistency found if any. + */ +bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, + const char **reason); + +/** + * Return true if the two bitmaps contain the same elements. + */ +bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Return true if all the elements of r1 are also in r2. + */ +bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Return true if all the elements of r1 are also in r2, and r2 is strictly + * greater than r1. + */ +bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the intersection between two bitmaps and returns new bitmap. The + * caller is responsible for free-ing the result. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may + * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary + * bitmaps. + */ +roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the intersection between two bitmaps. + */ +uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_and()`, modifies `r1`. `r1` and `r2` + * are allowed to be equal. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmaps. + */ +void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Check whether two bitmaps intersect. + */ +bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Check whether a bitmap intersects the range [min, max). + */ +bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max); + +/** + * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto + * distance, or the Jaccard similarity coefficient) + * + * The Jaccard index is undefined if both bitmaps are empty. + */ +double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the union between two bitmaps and returns new bitmap. The caller is + * responsible for free-ing the result. + */ +roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the union between two bitmaps. + */ +uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_or(), modifies `r1`. + */ +void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the symmetric difference (xor) between two bitmaps and returns a new + * bitmap. The caller is responsible for free-ing the result. + */ +roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the symmetric difference (xor) between two bitmaps. + */ +uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_xor()`, modifies `r1`. `r1` and `r2` + * are not allowed to be equal (that would result in an empty bitmap). + */ +void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the difference (andnot) between two bitmaps and returns a new + * bitmap. The caller is responsible for free-ing the result. + */ +roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the difference (andnot) between two bitmaps. + */ +uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_andnot()`, modifies `r1`. `r1` and `r2` + * are not allowed to be equal (that would result in an empty bitmap). + */ +void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Compute the negation of the bitmap in the interval [min, max). + * The number of negated values is `max - min`. Areas outside the range are + * passed through unchanged. + */ +roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max); + +/** + * Compute the negation of the bitmap in the interval [min, max]. + * The number of negated values is `max - min + 1`. Areas outside the range are + * passed through unchanged. + */ +roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max); + +/** + * In-place version of `roaring64_bitmap_flip`. Compute the negation of the + * bitmap in the interval [min, max). The number of negated values is `max - + * min`. Areas outside the range are passed through unchanged. + */ +void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); +/** + * In-place version of `roaring64_bitmap_flip_closed`. Compute the negation of + * the bitmap in the interval [min, max]. The number of negated values is `max - + * min + 1`. Areas outside the range are passed through unchanged. + */ +void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); +/** + * How many bytes are required to serialize this bitmap. + * + * This is meant to be compatible with other languages: + * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + */ +size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r); + +/** + * Write a bitmap to a buffer. The output buffer should refer to at least + * `roaring64_bitmap_portable_size_in_bytes(r)` bytes of allocated memory. + * + * Returns how many bytes were written, which should match + * `roaring64_bitmap_portable_size_in_bytes(r)`. + * + * This is meant to be compatible with other languages: + * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, + char *buf); +/** + * Check how many bytes would be read (up to maxbytes) at this pointer if there + * is a valid bitmap, returns zero if there is no valid bitmap. + * + * This is meant to be compatible with other languages + * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + */ +size_t roaring64_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes); + +/** + * Read a bitmap from a serialized buffer safely (reading up to maxbytes). + * In case of failure, NULL is returned. + * + * This is meant to be compatible with other languages + * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + * + * The function itself is safe in the sense that it will not cause buffer + * overflows. However, for correct operations, it is assumed that the bitmap + * read was once serialized from a valid bitmap (i.e., it follows the format + * specification). If you provided an incorrect input (garbage), then the bitmap + * read may not be in a valid state and following operations may not lead to + * sensible results. In particular, the serialized array containers need to be + * in sorted order, and the run containers should be in sorted non-overlapping + * order. This is is guaranteed to happen when serializing an existing bitmap, + * but not for random inputs. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, + size_t maxbytes); + +/** + * Iterate over the bitmap elements. The function `iterator` is called once for + * all the values with `ptr` (can be NULL) as the second parameter of each call. + * + * `roaring_iterator64` is simply a pointer to a function that returns a bool + * and takes `(uint64_t, void*)` as inputs. True means that the iteration should + * continue, while false means that it should stop. + * + * Returns true if the `roaring64_iterator` returned true throughout (so that + * all data points were necessarily visited). + * + * Iteration is ordered from the smallest to the largest elements. + */ +bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, + roaring_iterator64 iterator, void *ptr); + +/** + * Convert the bitmap to a sorted array `out`. + * + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * ``` + * out = malloc(roaring64_bitmap_get_cardinality(bitmap) * sizeof(uint64_t)); + * ``` + */ +void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, + uint64_t *out); + +/** + * Create an iterator object that can be used to iterate through the values. + * Caller is responsible for calling `roaring64_iterator_free()`. + * + * The iterator is initialized. If there is a value, then this iterator points + * to the first value and `roaring64_iterator_has_value()` returns true. The + * value can be retrieved with `roaring64_iterator_value()`. + */ +roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r); + +/** + * Create an iterator object that can be used to iterate through the values. + * Caller is responsible for calling `roaring64_iterator_free()`. + * + * The iterator is initialized. If there is a value, then this iterator points + * to the last value and `roaring64_iterator_has_value()` returns true. The + * value can be retrieved with `roaring64_iterator_value()`. + */ +roaring64_iterator_t *roaring64_iterator_create_last( + const roaring64_bitmap_t *r); + +/** + * Re-initializes an existing iterator. Functionally the same as + * `roaring64_iterator_create` without a allocation. + */ +void roaring64_iterator_reinit(const roaring64_bitmap_t *r, + roaring64_iterator_t *it); + +/** + * Re-initializes an existing iterator. Functionally the same as + * `roaring64_iterator_create_last` without a allocation. + */ +void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r, + roaring64_iterator_t *it); + +/** + * Creates a copy of the iterator. Caller is responsible for calling + * `roaring64_iterator_free()` on the resulting iterator. + */ +roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it); + +/** + * Free the iterator. + */ +void roaring64_iterator_free(roaring64_iterator_t *it); + +/** + * Returns true if the iterator currently points to a value. If so, calling + * `roaring64_iterator_value()` returns the value. + */ +bool roaring64_iterator_has_value(const roaring64_iterator_t *it); + +/** + * Returns the value the iterator currently points to. Should only be called if + * `roaring64_iterator_has_value()` returns true. + */ +uint64_t roaring64_iterator_value(const roaring64_iterator_t *it); + +/** + * Advance the iterator. If there is a new value, then + * `roaring64_iterator_has_value()` returns true. Values are traversed in + * increasing order. For convenience, returns the result of + * `roaring64_iterator_has_value()`. + * + * Once this returns false, `roaring64_iterator_advance` should not be called on + * the iterator again. Calling `roaring64_iterator_previous` is allowed. + */ +bool roaring64_iterator_advance(roaring64_iterator_t *it); + +/** + * Decrement the iterator. If there is a new value, then + * `roaring64_iterator_has_value()` returns true. Values are traversed in + * decreasing order. For convenience, returns the result of + * `roaring64_iterator_has_value()`. + * + * Once this returns false, `roaring64_iterator_previous` should not be called + * on the iterator again. Calling `roaring64_iterator_advance` is allowed. + */ +bool roaring64_iterator_previous(roaring64_iterator_t *it); + +/** + * Move the iterator to the first value greater than or equal to `val`, if it + * exists at or after the current position of the iterator. If there is a new + * value, then `roaring64_iterator_has_value()` returns true. Values are + * traversed in increasing order. For convenience, returns the result of + * `roaring64_iterator_has_value()`. + */ +bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, + uint64_t val); + +/** + * Reads up to `count` values from the iterator into the given `buf`. Returns + * the number of elements read. The number of elements read can be smaller than + * `count`, which means that there are no more elements in the bitmap. + * + * This function can be used together with other iterator functions. + */ +uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, + uint64_t count); + +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace api +#endif + +#endif /* ROARING64_H */ +/* end file include/roaring/roaring64.h */ +#endif diff --git a/src/lib/third_party/include/roaring_v2.h b/src/lib/third_party/include/roaring_v2.h new file mode 100644 index 000000000..500ba9cb9 --- /dev/null +++ b/src/lib/third_party/include/roaring_v2.h @@ -0,0 +1,1143 @@ +// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! +// Created by amalgamation.sh on 2023-02-12T11:34:02Z + +/* + * The CRoaring project is under a dual license (Apache/MIT). + * Users of the library may choose one or the other license. + */ +/* + * Copyright 2016-2022 The CRoaring authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * MIT License + * + * Copyright 2016-2022 The CRoaring authors + * + * Permission is hereby granted, free of charge, to any + * person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the + * Software without restriction, including without + * limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software + * is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice + * shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +/* begin file include/roaring/roaring_version.h */ +// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand +#ifndef ROARING_INCLUDE_ROARING_VERSION +#define ROARING_INCLUDE_ROARING_VERSION +#define ROARING_VERSION "0.9.6" +enum { + ROARING_VERSION_MAJOR = 0, + ROARING_VERSION_MINOR = 9, + ROARING_VERSION_REVISION = 6 +}; +#endif // ROARING_INCLUDE_ROARING_VERSION +/* end file include/roaring/roaring_version.h */ +/* begin file include/roaring/roaring_types.h */ +/* + Typedefs used by various components +*/ + +#ifndef ROARING_TYPES_H +#define ROARING_TYPES_H + +#include <stdbool.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace api { +#endif + + +/** + * When building .c files as C++, there's added compile-time checking if the + * container types are derived from a `container_t` base class. So long as + * such a base class is empty, the struct will behave compatibly with C structs + * despite the derivation. This is due to the Empty Base Class Optimization: + * + * https://en.cppreference.com/w/cpp/language/ebo + * + * But since C isn't namespaced, taking `container_t` globally might collide + * with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal + * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;` + */ +#if defined(__cplusplus) + extern "C++" { + struct container_s {}; + } + #define ROARING_CONTAINER_T ::roaring::api::container_s +#else + #define ROARING_CONTAINER_T void // no compile-time checking +#endif + +#define ROARING_FLAG_COW UINT8_C(0x1) +#define ROARING_FLAG_FROZEN UINT8_C(0x2) + +/** + * Roaring arrays are array-based key-value pairs having containers as values + * and 16-bit integer keys. A roaring bitmap might be implemented as such. + */ + +// parallel arrays. Element sizes quite different. +// Alternative is array +// of structs. Which would have better +// cache performance through binary searches? + +typedef struct roaring_array_s { + int32_t size; + int32_t allocation_size; + ROARING_CONTAINER_T **containers; // Use container_t in non-API files! + uint16_t *keys; + uint8_t *typecodes; + uint8_t flags; +} roaring_array_t; + + +typedef bool (*roaring_iterator)(uint32_t value, void *param); +typedef bool (*roaring_iterator64)(uint64_t value, void *param); + +/** +* (For advanced users.) +* The roaring_statistics_t can be used to collect detailed statistics about +* the composition of a roaring bitmap. +*/ +typedef struct roaring_statistics_s { + uint32_t n_containers; /* number of containers */ + + uint32_t n_array_containers; /* number of array containers */ + uint32_t n_run_containers; /* number of run containers */ + uint32_t n_bitset_containers; /* number of bitmap containers */ + + uint32_t + n_values_array_containers; /* number of values in array containers */ + uint32_t n_values_run_containers; /* number of values in run containers */ + uint32_t + n_values_bitset_containers; /* number of values in bitmap containers */ + + uint32_t n_bytes_array_containers; /* number of allocated bytes in array + containers */ + uint32_t n_bytes_run_containers; /* number of allocated bytes in run + containers */ + uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap + containers */ + + uint32_t + max_value; /* the maximal value, undefined if cardinality is zero */ + uint32_t + min_value; /* the minimal value, undefined if cardinality is zero */ + uint64_t sum_value; /* the sum of all values (could be used to compute + average) */ + + uint64_t cardinality; /* total number of values stored in the bitmap */ + + // and n_values_arrays, n_values_rle, n_values_bitmap +} roaring_statistics_t; + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { +#endif + +#endif /* ROARING_TYPES_H */ +/* end file include/roaring/roaring_types.h */ +/* begin file include/roaring/roaring.h */ +/* + * An implementation of Roaring Bitmaps in C. + */ + +#ifndef ROARING_H +#define ROARING_H + +#include <stdbool.h> +#include <stdint.h> +#include <stddef.h> // for `size_t` + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace api { +#endif + +typedef struct roaring_bitmap_s { + roaring_array_t high_low_container; +} roaring_bitmap_t; + +/** + * Dynamically allocates a new bitmap (initially empty). + * Returns NULL if the allocation fails. + * Capacity is a performance hint for how many "containers" the data will need. + * Client is responsible for calling `roaring_bitmap_free()`. + */ +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); + +/** + * Dynamically allocates a new bitmap (initially empty). + * Returns NULL if the allocation fails. + * Client is responsible for calling `roaring_bitmap_free()`. + */ +static inline roaring_bitmap_t *roaring_bitmap_create(void) + { return roaring_bitmap_create_with_capacity(0); } + +/** + * Initialize a roaring bitmap structure in memory controlled by client. + * Capacity is a performance hint for how many "containers" the data will need. + * Can return false if auxiliary allocations fail when capacity greater than 0. + */ +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); + +/** + * Initialize a roaring bitmap structure in memory controlled by client. + * The bitmap will be in a "clear" state, with no auxiliary allocations. + * Since this performs no allocations, the function will not fail. + */ +static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) + { roaring_bitmap_init_with_capacity(r, 0); } + +/** + * Add all the values between min (included) and max (excluded) that are at a + * distance k*step from min. +*/ +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, + uint32_t step); + +/** + * Creates a new bitmap from a pointer of uint32_t integers + */ +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); + +/* + * Whether you want to use copy-on-write. + * Saves memory and avoids copies, but needs more care in a threaded context. + * Most users should ignore this flag. + * + * Note: If you do turn this flag to 'true', enabling COW, then ensure that you + * do so for all of your bitmaps, since interactions between bitmaps with and + * without COW is unsafe. + */ +static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) { + return r->high_low_container.flags & ROARING_FLAG_COW; +} +static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, + bool cow) { + if (cow) { + r->high_low_container.flags |= ROARING_FLAG_COW; + } else { + r->high_low_container.flags &= ~ROARING_FLAG_COW; + } +} + +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, + int64_t offset); + +#ifdef NDPI_ENABLE_DEBUG_MESSAGES +/** + * Describe the inner structure of the bitmap. + */ +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); +#endif + +/** + * Creates a new bitmap from a list of uint32_t integers + */ +roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); + +/** + * Copies a bitmap (this does memory allocation). + * The caller is responsible for memory management. + */ +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); + +/** + * Copies a bitmap from src to dest. It is assumed that the pointer dest + * is to an already allocated bitmap. The content of the dest bitmap is + * freed/deleted. + * + * It might be preferable and simpler to call roaring_bitmap_copy except + * that roaring_bitmap_overwrite can save on memory allocations. + */ +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, + const roaring_bitmap_t *src); + +#ifdef NDPI_ENABLE_DEBUG_MESSAGES +/** + * Print the content of the bitmap. + */ +void roaring_bitmap_printf(const roaring_bitmap_t *r); +#endif + +/** + * Computes the intersection between two bitmaps and returns new bitmap. The + * caller is responsible for memory management. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * You may also rely on roaring_bitmap_and_inplace to avoid creating + * many temporary bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Computes the size of the intersection between two bitmaps. + */ +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Check whether two bitmaps intersect. + */ +bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Check whether a bitmap and a closed range intersect. + */ +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, + uint64_t x, uint64_t y); + +/** + * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto + * distance, or the Jaccard similarity coefficient) + * + * The Jaccard index is undefined if both bitmaps are empty. + */ +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Computes the size of the union between two bitmaps. + */ +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Computes the size of the difference (andnot) between two bitmaps. + */ +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Computes the size of the symmetric difference (xor) between two bitmaps. + */ +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Inplace version of `roaring_bitmap_and()`, modifies r1 + * r1 == r2 is allowed. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + */ +void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Computes the union between two bitmaps and returns new bitmap. The caller is + * responsible for memory management. + */ +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Inplace version of `roaring_bitmap_or(), modifies r1. + * TODO: decide whether r1 == r2 ok + */ +void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Compute the union of 'number' bitmaps. + * Caller is responsible for freeing the result. + * See also `roaring_bitmap_or_many_heap()` + */ +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, + const roaring_bitmap_t **rs); + +/** + * Compute the union of 'number' bitmaps using a heap. This can sometimes be + * faster than `roaring_bitmap_or_many() which uses a naive algorithm. + * Caller is responsible for freeing the result. + */ +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, + const roaring_bitmap_t **rs); + +/** + * Computes the symmetric difference (xor) between two bitmaps + * and returns new bitmap. The caller is responsible for memory management. + */ +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2. + */ +void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Compute the xor of 'number' bitmaps. + * Caller is responsible for freeing the result. + */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, + const roaring_bitmap_t **rs); + +/** + * Computes the difference (andnot) between two bitmaps and returns new bitmap. + * Caller is responsible for freeing the result. + */ +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2. + */ +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * TODO: consider implementing: + * + * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be + * faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is + * responsible for freeing the result."" + * + * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, + * const roaring_bitmap_t **rs); + */ + +/** + * Frees the memory. + */ +void roaring_bitmap_free(const roaring_bitmap_t *r); + +/** + * A bit of context usable with `roaring_bitmap_*_bulk()` functions + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap + * (unless re-initialized to zero), and any modification to a bitmap + * (other than modifications performed with `_bulk()` functions with the context + * passed) will invalidate any contexts associated with that bitmap. + */ +typedef struct roaring_bulk_context_s { + ROARING_CONTAINER_T *container; + int idx; + uint16_t key; + uint8_t typecode; +} roaring_bulk_context_t; + +/** + * Add an item, using context from a previous insert for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val); + +/** + * Add value n_args from pointer vals, faster than repeatedly calling + * `roaring_bitmap_add()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same "key" (high 16 bits of the value) as consecutive + * elements in `vals` + */ +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals); + +/** + * Add value x + */ +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); + +/** + * Add value x + * Returns true if a new value was added, false if the value already existed. + */ +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); + +/** + * Add all values in range [min, max] + */ +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, + uint32_t min, uint32_t max); + +/** + * Add all values in range [min, max) + */ +static inline void roaring_bitmap_add_range(roaring_bitmap_t *r, + uint64_t min, uint64_t max) { + if(max == min) return; + roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); +} + +/** + * Remove value x + */ +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); + +/** + * Remove all values in range [min, max] + */ +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, + uint32_t min, uint32_t max); + +/** + * Remove all values in range [min, max) + */ +static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, + uint64_t min, uint64_t max) { + if(max == min) return; + roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); +} + +/** + * Remove multiple values + */ +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals); + +/** + * Remove value x + * Returns true if a new value was removed, false if the value was not existing. + */ +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); + +/** + * Check if value is present + */ +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); + +/** + * Check whether a range of values from range_start (included) + * to range_end (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end); + +/** + * Check if an items is present, using context from a previous insert for speed + * optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val); + +/** + * Get the cardinality of the bitmap (number of elements). + */ +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r); + +/** + * Returns the number of elements in the range [range_start, range_end). + */ +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end); + +/** +* Returns true if the bitmap is empty (cardinality is zero). +*/ +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); + + +/** + * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap + * was initialized in client memory via roaring_bitmap_init(), then a call to + * roaring_bitmap_clear() would be enough to "free" it) + */ +void roaring_bitmap_clear(roaring_bitmap_t *r); + +/** + * Convert the bitmap to a sorted array, output in `ans`. + * + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * + * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t)); + */ +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); + + +/** + * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`. + * + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * + * ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t)); + * + * Return false in case of failure (e.g., insufficient memory) + */ +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, + size_t offset, size_t limit, + uint32_t *ans); + +/** + * Remove run-length encoding even when it is more space efficient. + * Return whether a change was applied. + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); + +/** + * Convert array and bitmap containers to run containers when it is more + * efficient; also convert from run containers when more space efficient. + * + * Returns true if the result has at least one run container. + * Additional savings might be possible by calling `shrinkToFit()`. + */ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); + +/** + * If needed, reallocate memory to shrink the memory usage. + * Returns the number of bytes saved. + */ +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); + +/** + * Write the bitmap to an output pointer, this output buffer should refer to + * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes. + * + * See `roaring_bitmap_portable_serialize()` if you want a format that's + * compatible with Java and Go implementations. This format can sometimes be + * more space efficient than the portable form, e.g. when the data is sparse. + * + * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); + +/** + * Use with `roaring_bitmap_serialize()`. + * + * (See `roaring_bitmap_portable_deserialize()` if you want a format that's + * compatible with Java and Go implementations). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); + +/** + * How many bytes are required to serialize this bitmap (NOT compatible + * with Java and Go versions) + */ +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); + +/** + * Read bitmap from a serialized buffer. + * In case of failure, NULL is returned. + * + * This function is unsafe in the sense that if there is no valid serialized + * bitmap at the pointer, then many bytes could be read, possibly causing a + * buffer overflow. See also roaring_bitmap_portable_deserialize_safe(). + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec +* + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); + +/** + * Read bitmap from a serialized buffer safely (reading up to maxbytes). + * In case of failure, NULL is returned. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, + size_t maxbytes); + +/** + * Read bitmap from a serialized buffer. + * In case of failure, NULL is returned. + * + * Bitmap returned by this function can be used in all readonly contexts. + * Bitmap must be freed as usual, by calling roaring_bitmap_free(). + * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * The function is unsafe in the following ways: + * 1) It may execute unaligned memory accesses. + * 2) A buffer overflow may occur if buf does not point to a valid serialized + * bitmap. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); + +/** + * Check how many bytes would be read (up to maxbytes) at this pointer if there + * is a bitmap, returns zero if there is no valid bitmap. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + */ +size_t roaring_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes); + +/** + * How many bytes are required to serialize this bitmap. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + */ +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); + +/** + * Write a bitmap to a char buffer. The output buffer should refer to at least + * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory. + * + * Returns how many bytes were written which should match + * `roaring_bitmap_portable_size_in_bytes(r)`. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); + +/* + * "Frozen" serialization format imitates memory layout of roaring_bitmap_t. + * Deserialized bitmap is a constant view of the underlying buffer. + * This significantly reduces amount of allocations and copying required during + * deserialization. + * It can be used with memory mapped files. + * Example can be found in benchmarks/frozen_benchmark.c + * + * [#####] const roaring_bitmap_t * + * | | | + * +----+ | +-+ + * | | | + * [#####################################] underlying buffer + * + * Note that because frozen serialization format imitates C memory layout + * of roaring_bitmap_t, it is not fixed. It is different on big/little endian + * platforms and can be changed in future. + */ + +/** + * Returns number of bytes required to serialize bitmap using frozen format. + */ +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); + +/** + * Serializes bitmap using frozen format. + * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); + +/** + * Creates constant bitmap that is a view of a given buffer. + * Buffer data should have been written by `roaring_bitmap_frozen_serialize()` + * Its beginning must also be aligned by 32 bytes. + * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`. + * In case of failure, NULL is returned. + * + * Bitmap returned by this function can be used in all readonly contexts. + * Bitmap must be freed as usual, by calling roaring_bitmap_free(). + * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, + size_t length); + +/** + * Iterate over the bitmap elements. The function iterator is called once for + * all the values with ptr (can be NULL) as the second parameter of each call. + * + * `roaring_iterator` is simply a pointer to a function that returns bool + * (true means that the iteration should continue while false means that it + * should stop), and takes (uint32_t,void*) as inputs. + * + * Returns true if the roaring_iterator returned true throughout (so that all + * data points were necessarily visited). + * + * Iteration is ordered: from the smallest to the largest elements. + */ +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, + void *ptr); + +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, + uint64_t high_bits, void *ptr); + +/** + * Return true if the two bitmaps contain the same elements. + */ +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Return true if all the elements of r1 are also in r2. + */ +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Return true if all the elements of r1 are also in r2, and r2 is strictly + * greater than r1. + */ +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * (For expert users who seek high performance.) + * + * Computes the union between two bitmaps and returns new bitmap. The caller is + * responsible for memory management. + * + * The lazy version defers some computations such as the maintenance of the + * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` + * after executing "lazy" computations. + * + * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. + * + * `bitsetconversion` is a flag which determines whether container-container + * operations force a bitset conversion. + */ +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2, + const bool bitsetconversion); + +/** + * (For expert users who seek high performance.) + * + * Inplace version of roaring_bitmap_lazy_or, modifies r1. + * + * `bitsetconversion` is a flag which determines whether container-container + * operations force a bitset conversion. + */ +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2, + const bool bitsetconversion); + +/** + * (For expert users who seek high performance.) + * + * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()` + * or modified with `roaring_bitmap_lazy_or_inplace()`. + */ +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); + +/** + * Computes the symmetric difference between two bitmaps and returns new bitmap. + * The caller is responsible for memory management. + * + * The lazy version defers some computations such as the maintenance of the + * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` + * after executing "lazy" computations. + * + * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on + * the result. + */ +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * (For expert users who seek high performance.) + * + * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2 + */ +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Compute the negation of the bitmap in the interval [range_start, range_end). + * The number of negated values is range_end - range_start. + * Areas outside the range are passed through unchanged. + */ +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, + uint64_t range_start, uint64_t range_end); + +/** + * compute (in place) the negation of the roaring bitmap within a specified + * interval: [range_start, range_end). The number of negated values is + * range_end - range_start. + * Areas outside the range are passed through unchanged. + */ +void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, + uint64_t range_end); + +/** + * Selects the element at index 'rank' where the smallest element is at index 0. + * If the size of the roaring bitmap is strictly greater than rank, then this + * function returns true and sets element to the element of given rank. + * Otherwise, it returns false. + */ +bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, + uint32_t *element); + +/** + * roaring_bitmap_rank returns the number of integers that are smaller or equal + * to x. Thus if x is the first element, this function will return 1. If + * x is smaller than the smallest element, this function will return 0. + * + * The indexing convention differs between roaring_bitmap_select and + * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value + * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking + * the smallest value. + */ +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); + +/** + * Returns the smallest value in the set, or UINT32_MAX if the set is empty. + */ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); + +/** + * Returns the greatest value in the set, or 0 if the set is empty. + */ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); + +/** + * (For advanced users.) + * + * Collect statistics about the bitmap, see roaring_types.h for + * a description of roaring_statistics_t + */ +void roaring_bitmap_statistics(const roaring_bitmap_t *r, + roaring_statistics_t *stat); + +/********************* +* What follows is code use to iterate through values in a roaring bitmap + +roaring_bitmap_t *r =... +roaring_uint32_iterator_t i; +roaring_create_iterator(r, &i); +while(i.has_value) { + printf("value = %d\n", i.current_value); + roaring_advance_uint32_iterator(&i); +} + +Obviously, if you modify the underlying bitmap, the iterator +becomes invalid. So don't. +*/ + +typedef struct roaring_uint32_iterator_s { + const roaring_bitmap_t *parent; // owner + int32_t container_index; // point to the current container index + int32_t in_container_index; // for bitset and array container, this is out + // index + int32_t run_index; // for run container, this points at the run + + uint32_t current_value; + bool has_value; + + const ROARING_CONTAINER_T + *container; // should be: + // parent->high_low_container.containers[container_index]; + uint8_t typecode; // should be: + // parent->high_low_container.typecodes[container_index]; + uint32_t highbits; // should be: + // parent->high_low_container.keys[container_index]) << + // 16; + +} roaring_uint32_iterator_t; + +/** + * Initialize an iterator object that can be used to iterate through the + * values. If there is a value, then this iterator points to the first value + * and `it->has_value` is true. The value is in `it->current_value`. + */ +void roaring_init_iterator(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit); + +/** + * Initialize an iterator object that can be used to iterate through the + * values. If there is a value, then this iterator points to the last value + * and `it->has_value` is true. The value is in `it->current_value`. + */ +void roaring_init_iterator_last(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit); + +/** + * Create an iterator object that can be used to iterate through the values. + * Caller is responsible for calling `roaring_free_iterator()`. + * + * The iterator is initialized (this function calls `roaring_init_iterator()`) + * If there is a value, then this iterator points to the first value and + * `it->has_value` is true. The value is in `it->current_value`. + */ +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); + +/** +* Advance the iterator. If there is a new value, then `it->has_value` is true. +* The new value is in `it->current_value`. Values are traversed in increasing +* orders. For convenience, returns `it->has_value`. +*/ +bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); + +/** +* Decrement the iterator. If there's a new value, then `it->has_value` is true. +* The new value is in `it->current_value`. Values are traversed in decreasing +* order. For convenience, returns `it->has_value`. +*/ +bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); + +/** + * Move the iterator to the first value >= `val`. If there is a such a value, + * then `it->has_value` is true. The new value is in `it->current_value`. + * For convenience, returns `it->has_value`. + */ +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, + uint32_t val); + +/** + * Creates a copy of an iterator. + * Caller must free it. + */ +roaring_uint32_iterator_t *roaring_copy_uint32_iterator( + const roaring_uint32_iterator_t *it); + +/** + * Free memory following `roaring_create_iterator()` + */ +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); + +/* + * Reads next ${count} values from iterator into user-supplied ${buf}. + * Returns the number of read elements. + * This number can be smaller than ${count}, which means that iterator is drained. + * + * This function satisfies semantics of iteration and can be used together with + * other iterator functions. + * - first value is copied from ${it}->current_value + * - after function returns, iterator is positioned at the next element + */ +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, + uint32_t* buf, uint32_t count); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { +#endif + +#endif /* ROARING_H */ + +#ifdef __cplusplus + /** + * Best practices for C++ headers is to avoid polluting global scope. + * But for C compatibility when just `roaring.h` is included building as + * C++, default to global access for the C public API. + * + * BUT when `roaring.hh` is included instead, it sets this flag. That way + * explicit namespacing must be used to get the C functions. + * + * This is outside the include guard so that if you include BOTH headers, + * the order won't matter; you still get the global definitions. + */ + #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) + using namespace ::roaring::api; + #endif +#endif +/* end file include/roaring/roaring.h */ +/* begin file include/roaring/memory.h */ +#ifndef INCLUDE_ROARING_MEMORY_H_ +#define INCLUDE_ROARING_MEMORY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> // for size_t + +typedef void* (*roaring_malloc_p)(size_t); +typedef void* (*roaring_realloc_p)(void*, size_t); +typedef void* (*roaring_calloc_p)(size_t, size_t); +typedef void (*roaring_free_p)(void*); +typedef void* (*roaring_aligned_malloc_p)(size_t, size_t); +typedef void (*roaring_aligned_free_p)(void*); + +typedef struct roaring_memory_s { + roaring_malloc_p malloc; + roaring_realloc_p realloc; + roaring_calloc_p calloc; + roaring_free_p free; + roaring_aligned_malloc_p aligned_malloc; + roaring_aligned_free_p aligned_free; +} roaring_memory_t; + +void roaring_init_memory_hook(roaring_memory_t memory_hook); + +void* roaring_malloc(size_t); +void* roaring_realloc(void*, size_t); +void* roaring_calloc(size_t, size_t); +void roaring_free(void*); +void* roaring_aligned_malloc(size_t, size_t); +void roaring_aligned_free(void*); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_ROARING_MEMORY_H_ +/* end file include/roaring/memory.h */ |