diff options
author | Luca Deri <deri@ntop.org> | 2023-02-12 12:50:05 +0100 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2023-02-12 12:50:24 +0100 |
commit | bf413afba1b79685caf1ccade5f984c2d6e92e3c (patch) | |
tree | 50d46f7a5e68496dcd8734ada5296b6b47a4160f | |
parent | ba4e145aad4c7dbd1cbc6d2a6557f3686447d96a (diff) |
Update roaring bitmap code
-rw-r--r-- | src/lib/ndpi_bitmap.c | 1 | ||||
-rw-r--r-- | src/lib/third_party/include/roaring.h | 379 | ||||
-rw-r--r-- | src/lib/third_party/src/roaring.c (renamed from src/lib/third_party/src/roaring.cc) | 17848 |
3 files changed, 9442 insertions, 8786 deletions
diff --git a/src/lib/ndpi_bitmap.c b/src/lib/ndpi_bitmap.c index 499b1342a..cf23b3f34 100644 --- a/src/lib/ndpi_bitmap.c +++ b/src/lib/ndpi_bitmap.c @@ -36,7 +36,6 @@ #include "ndpi_encryption.h" #include "third_party/include/roaring.h" -#include "third_party/src/roaring.cc" /* ******************************************* */ diff --git a/src/lib/third_party/include/roaring.h b/src/lib/third_party/include/roaring.h index 2d5bb856f..117f861b4 100644 --- a/src/lib/third_party/include/roaring.h +++ b/src/lib/third_party/include/roaring.h @@ -1,8 +1,12 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on Mer 25 Ago 2021 04:24:41 CEST +// Created by amalgamation.sh on 2023-02-12T11:34:02Z /* - * Copyright 2016-2020 The CRoaring authors + * The CRoaring project is under a dual license (Apache/MIT). + * Users of the library may choose one or the other license. + */ +/* + * Copyright 2016-2022 The CRoaring authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,16 +22,47 @@ * * SPDX-License-Identifier: Apache-2.0 */ +/* + * MIT License + * + * Copyright 2016-2022 The CRoaring authors + * + * Permission is hereby granted, free of charge, to any + * person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the + * Software without restriction, including without + * limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software + * is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice + * shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ /* begin file include/roaring/roaring_version.h */ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION = 0.3.4, +#define ROARING_VERSION "0.9.6" enum { ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 3, - ROARING_VERSION_REVISION = 4 + ROARING_VERSION_MINOR = 9, + ROARING_VERSION_REVISION = 6 }; #endif // ROARING_INCLUDE_ROARING_VERSION /* end file include/roaring/roaring_version.h */ @@ -68,12 +103,6 @@ extern "C" { namespace roaring { namespace api { #define ROARING_CONTAINER_T void // no compile-time checking #endif - -#define MAX_CONTAINERS 65536 - -#define SERIALIZATION_ARRAY_UINT32 1 -#define SERIALIZATION_CONTAINER 2 - #define ROARING_FLAG_COW UINT8_C(0x1) #define ROARING_FLAG_FROZEN UINT8_C(0x2) @@ -170,7 +199,7 @@ typedef struct roaring_bitmap_s { * Capacity is a performance hint for how many "containers" the data will need. * Client is responsible for calling `roaring_bitmap_free()`. */ -static roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); /** * Dynamically allocates a new bitmap (initially empty). @@ -185,7 +214,7 @@ static inline roaring_bitmap_t *roaring_bitmap_create(void) * Capacity is a performance hint for how many "containers" the data will need. * Can return false if auxiliary allocations fail when capacity greater than 0. */ -static bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); /** * Initialize a roaring bitmap structure in memory controlled by client. @@ -199,13 +228,13 @@ static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) * Add all the values between min (included) and max (excluded) that are at a * distance k*step from min. */ -static roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step); /** * Creates a new bitmap from a pointer of uint32_t integers */ -static roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); /* * Whether you want to use copy-on-write. @@ -228,21 +257,23 @@ static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, } } +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, + int64_t offset); /** * Describe the inner structure of the bitmap. */ -static void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); /** * Creates a new bitmap from a list of uint32_t integers */ -static roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); +roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); /** * Copies a bitmap (this does memory allocation). * The caller is responsible for memory management. */ -static roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); /** * Copies a bitmap from src to dest. It is assumed that the pointer dest @@ -252,37 +283,42 @@ static roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); * It might be preferable and simpler to call roaring_bitmap_copy except * that roaring_bitmap_overwrite can save on memory allocations. */ -static bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, const roaring_bitmap_t *src); /** * Print the content of the bitmap. */ -static void roaring_bitmap_printf(const roaring_bitmap_t *r); +void roaring_bitmap_printf(const roaring_bitmap_t *r); /** * Computes the intersection between two bitmaps and returns new bitmap. The * caller is responsible for memory management. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * You may also rely on roaring_bitmap_and_inplace to avoid creating + * many temporary bitmaps. */ -static roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the intersection between two bitmaps. */ -static uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1, +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Check whether two bitmaps intersect. */ -static bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, +bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Check whether a bitmap and a closed range intersect. */ -static bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, uint64_t y); /** @@ -291,46 +327,49 @@ static bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, * * The Jaccard index is undefined if both bitmaps are empty. */ -static double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1, +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the union between two bitmaps. */ -static uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1, +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the difference (andnot) between two bitmaps. */ -static uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1, +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the symmetric difference (xor) between two bitmaps. */ -static uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of `roaring_bitmap_and()`, modifies r1 - * r1 == r2 is allowed + * r1 == r2 is allowed. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ -static void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, +void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for memory management. */ -static roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of `roaring_bitmap_or(), modifies r1. * TODO: decide whether r1 == r2 ok */ -static void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, +void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** @@ -338,7 +377,7 @@ static void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, * Caller is responsible for freeing the result. * See also `roaring_bitmap_or_many_heap()` */ -static roaring_bitmap_t *roaring_bitmap_or_many(size_t number, +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, const roaring_bitmap_t **rs); /** @@ -346,40 +385,40 @@ static roaring_bitmap_t *roaring_bitmap_or_many(size_t number, * faster than `roaring_bitmap_or_many() which uses a naive algorithm. * Caller is responsible for freeing the result. */ -static roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, const roaring_bitmap_t **rs); /** * Computes the symmetric difference (xor) between two bitmaps * and returns new bitmap. The caller is responsible for memory management. */ -static roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2. */ -static void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, +void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Compute the xor of 'number' bitmaps. * Caller is responsible for freeing the result. */ -static roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, const roaring_bitmap_t **rs); /** * Computes the difference (andnot) between two bitmaps and returns new bitmap. * Caller is responsible for freeing the result. */ -static roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2. */ -static void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** @@ -396,30 +435,69 @@ static void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, /** * Frees the memory. */ -static void roaring_bitmap_free(const roaring_bitmap_t *r); +void roaring_bitmap_free(const roaring_bitmap_t *r); + +/** + * A bit of context usable with `roaring_bitmap_*_bulk()` functions + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap + * (unless re-initialized to zero), and any modification to a bitmap + * (other than modifications performed with `_bulk()` functions with the context + * passed) will invalidate any contexts associated with that bitmap. + */ +typedef struct roaring_bulk_context_s { + ROARING_CONTAINER_T *container; + int idx; + uint16_t key; + uint8_t typecode; +} roaring_bulk_context_t; + +/** + * Add an item, using context from a previous insert for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val); /** * Add value n_args from pointer vals, faster than repeatedly calling * `roaring_bitmap_add()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same "key" (high 16 bits of the value) as consecutive + * elements in `vals` */ -static void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); /** * Add value x */ -static void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); /** * Add value x * Returns true if a new value was added, false if the value already existed. */ -static bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); /** * Add all values in range [min, max] */ -static void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max); /** @@ -434,12 +512,12 @@ static inline void roaring_bitmap_add_range(roaring_bitmap_t *r, /** * Remove value x */ -static void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); /** * Remove all values in range [min, max] */ -static void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max); /** @@ -454,44 +532,63 @@ static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, /** * Remove multiple values */ -static void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); /** * Remove value x * Returns true if a new value was removed, false if the value was not existing. */ -static bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); /** * Check if value is present */ -static bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); /** * Check whether a range of values from range_start (included) * to range_end (excluded) is present */ -static bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); /** + * Check if an items is present, using context from a previous insert for speed + * optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val); + +/** * Get the cardinality of the bitmap (number of elements). */ -static uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r); +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r); /** * Returns the number of elements in the range [range_start, range_end). */ -static uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); /** * Returns true if the bitmap is empty (cardinality is zero). */ -static bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); /** @@ -499,20 +596,20 @@ static bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); * was initialized in client memory via roaring_bitmap_init(), then a call to * roaring_bitmap_clear() would be enough to "free" it) */ -static void roaring_bitmap_clear(roaring_bitmap_t *r); +void roaring_bitmap_clear(roaring_bitmap_t *r); /** - * Convert the bitmap to an array, output in `ans`, + * Convert the bitmap to a sorted array, output in `ans`. * * Caller is responsible to ensure that there is enough memory allocated, e.g. * * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t)); */ -static void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); /** - * Convert the bitmap to an array from `offset` by `limit`, output in `ans`. + * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`. * * Caller is responsible to ensure that there is enough memory allocated, e.g. * @@ -520,7 +617,7 @@ static void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t * * * Return false in case of failure (e.g., insufficient memory) */ -static bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, size_t limit, uint32_t *ans); @@ -528,7 +625,7 @@ static bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, * Remove run-length encoding even when it is more space efficient. * Return whether a change was applied. */ -static bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); /** * Convert array and bitmap containers to run containers when it is more @@ -537,13 +634,13 @@ static bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); * Returns true if the result has at least one run container. * Additional savings might be possible by calling `shrinkToFit()`. */ -static bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); /** * If needed, reallocate memory to shrink the memory usage. * Returns the number of bytes saved. */ -static size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); /** * Write the bitmap to an output pointer, this output buffer should refer to @@ -554,22 +651,28 @@ static size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * more space efficient than the portable form, e.g. when the data is sparse. * * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); /** * Use with `roaring_bitmap_serialize()`. * * (See `roaring_bitmap_portable_deserialize()` if you want a format that's - * compatible with Java and Go implementations) + * compatible with Java and Go implementations). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); /** * How many bytes are required to serialize this bitmap (NOT compatible * with Java and Go versions) */ -static size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); /** * Read bitmap from a serialized buffer. @@ -581,8 +684,11 @@ static size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec +* + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); /** * Read bitmap from a serialized buffer safely (reading up to maxbytes). @@ -590,18 +696,42 @@ static roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); /** + * Read bitmap from a serialized buffer. + * In case of failure, NULL is returned. + * + * Bitmap returned by this function can be used in all readonly contexts. + * Bitmap must be freed as usual, by calling roaring_bitmap_free(). + * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * The function is unsafe in the following ways: + * 1) It may execute unaligned memory accesses. + * 2) A buffer overflow may occur if buf does not point to a valid serialized + * bitmap. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); + +/** * Check how many bytes would be read (up to maxbytes) at this pointer if there * is a bitmap, returns zero if there is no valid bitmap. * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec */ -static size_t roaring_bitmap_portable_deserialize_size(const char *buf, +size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); /** @@ -610,7 +740,7 @@ static size_t roaring_bitmap_portable_deserialize_size(const char *buf, * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec */ -static size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); /** * Write a bitmap to a char buffer. The output buffer should refer to at least @@ -621,8 +751,11 @@ static size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); /* * "Frozen" serialization format imitates memory layout of roaring_bitmap_t. @@ -646,13 +779,16 @@ static size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char /** * Returns number of bytes required to serialize bitmap using frozen format. */ -static size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); /** * Serializes bitmap using frozen format. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); /** * Creates constant bitmap that is a view of a given buffer. @@ -664,8 +800,11 @@ static void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf * Bitmap returned by this function can be used in all readonly contexts. * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -static const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, +const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); /** @@ -681,29 +820,29 @@ static const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, * * Iteration is ordered: from the smallest to the largest elements. */ -static bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, void *ptr); -static bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, uint64_t high_bits, void *ptr); /** * Return true if the two bitmaps contain the same elements. */ -static bool roaring_bitmap_equals(const roaring_bitmap_t *r1, +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Return true if all the elements of r1 are also in r2. */ -static bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Return true if all the elements of r1 are also in r2, and r2 is strictly * greater than r1. */ -static bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** @@ -721,7 +860,7 @@ static bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, * `bitsetconversion` is a flag which determines whether container-container * operations force a bitset conversion. */ -static roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2, const bool bitsetconversion); @@ -733,7 +872,7 @@ static roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, * `bitsetconversion` is a flag which determines whether container-container * operations force a bitset conversion. */ -static void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2, const bool bitsetconversion); @@ -743,7 +882,7 @@ static void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()` * or modified with `roaring_bitmap_lazy_or_inplace()`. */ -static void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); /** * Computes the symmetric difference between two bitmaps and returns new bitmap. @@ -756,7 +895,7 @@ static void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on * the result. */ -static roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** @@ -764,7 +903,7 @@ static roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, * * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2 */ -static void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** @@ -772,7 +911,7 @@ static void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, * The number of negated values is range_end - range_start. * Areas outside the range are passed through unchanged. */ -static roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); /** @@ -781,7 +920,7 @@ static roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, * range_end - range_start. * Areas outside the range are passed through unchanged. */ -static void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, +void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); /** @@ -790,7 +929,7 @@ static void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_sta * function returns true and sets element to the element of given rank. * Otherwise, it returns false. */ -static bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, +bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, uint32_t *element); /** @@ -803,17 +942,17 @@ static bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking * the smallest value. */ -static uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); /** * Returns the smallest value in the set, or UINT32_MAX if the set is empty. */ -static uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); /** * Returns the greatest value in the set, or 0 if the set is empty. */ -static uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); /** * (For advanced users.) @@ -821,7 +960,7 @@ static uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); * Collect statistics about the bitmap, see roaring_types.h for * a description of roaring_statistics_t */ -static void roaring_bitmap_statistics(const roaring_bitmap_t *r, +void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat); /********************* @@ -865,7 +1004,7 @@ typedef struct roaring_uint32_iterator_s { * values. If there is a value, then this iterator points to the first value * and `it->has_value` is true. The value is in `it->current_value`. */ -static void roaring_init_iterator(const roaring_bitmap_t *r, +void roaring_init_iterator(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); /** @@ -873,7 +1012,7 @@ static void roaring_init_iterator(const roaring_bitmap_t *r, * values. If there is a value, then this iterator points to the last value * and `it->has_value` is true. The value is in `it->current_value`. */ -static void roaring_init_iterator_last(const roaring_bitmap_t *r, +void roaring_init_iterator_last(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); /** @@ -884,41 +1023,41 @@ static void roaring_init_iterator_last(const roaring_bitmap_t *r, * If there is a value, then this iterator points to the first value and * `it->has_value` is true. The value is in `it->current_value`. */ -static roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); /** * Advance the iterator. If there is a new value, then `it->has_value` is true. * The new value is in `it->current_value`. Values are traversed in increasing * orders. For convenience, returns `it->has_value`. */ -static bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); +bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); /** * Decrement the iterator. If there's a new value, then `it->has_value` is true. * The new value is in `it->current_value`. Values are traversed in decreasing * order. For convenience, returns `it->has_value`. */ -static bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); +bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); /** * Move the iterator to the first value >= `val`. If there is a such a value, * then `it->has_value` is true. The new value is in `it->current_value`. * For convenience, returns `it->has_value`. */ -static bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val); /** * Creates a copy of an iterator. * Caller must free it. */ -static roaring_uint32_iterator_t *roaring_copy_uint32_iterator( +roaring_uint32_iterator_t *roaring_copy_uint32_iterator( const roaring_uint32_iterator_t *it); /** * Free memory following `roaring_create_iterator()` */ -static void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); /* * Reads next ${count} values from iterator into user-supplied ${buf}. @@ -930,7 +1069,7 @@ static void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); * - first value is copied from ${it}->current_value * - after function returns, iterator is positioned at the next element */ -static uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count); #ifdef __cplusplus @@ -955,5 +1094,45 @@ static uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, using namespace ::roaring::api; #endif #endif - /* end file include/roaring/roaring.h */ +/* begin file include/roaring/memory.h */ +#ifndef INCLUDE_ROARING_MEMORY_H_ +#define INCLUDE_ROARING_MEMORY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> // for size_t + +typedef void* (*roaring_malloc_p)(size_t); +typedef void* (*roaring_realloc_p)(void*, size_t); +typedef void* (*roaring_calloc_p)(size_t, size_t); +typedef void (*roaring_free_p)(void*); +typedef void* (*roaring_aligned_malloc_p)(size_t, size_t); +typedef void (*roaring_aligned_free_p)(void*); + +typedef struct roaring_memory_s { + roaring_malloc_p malloc; + roaring_realloc_p realloc; + roaring_calloc_p calloc; + roaring_free_p free; + roaring_aligned_malloc_p aligned_malloc; + roaring_aligned_free_p aligned_free; +} roaring_memory_t; + +void roaring_init_memory_hook(roaring_memory_t memory_hook); + +void* roaring_malloc(size_t); +void* roaring_realloc(void*, size_t); +void* roaring_calloc(size_t, size_t); +void roaring_free(void*); +void* roaring_aligned_malloc(size_t, size_t); +void roaring_aligned_free(void*); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_ROARING_MEMORY_H_ +/* end file include/roaring/memory.h */ diff --git a/src/lib/third_party/src/roaring.cc b/src/lib/third_party/src/roaring.c index 778d36004..58c1ea78c 100644 --- a/src/lib/third_party/src/roaring.cc +++ b/src/lib/third_party/src/roaring.c @@ -1,8 +1,12 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on Mer 25 Ago 2021 04:24:41 CEST +// Created by amalgamation.sh on 2023-02-12T11:34:02Z /* - * Copyright 2016-2020 The CRoaring authors + * The CRoaring project is under a dual license (Apache/MIT). + * Users of the library may choose one or the other license. + */ +/* + * Copyright 2016-2022 The CRoaring authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +22,37 @@ * * SPDX-License-Identifier: Apache-2.0 */ +/* + * MIT License + * + * Copyright 2016-2022 The CRoaring authors + * + * Permission is hereby granted, free of charge, to any + * person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the + * Software without restriction, including without + * limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software + * is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice + * shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ #include "roaring.h" @@ -27,262 +62,53 @@ #endif #include "roaring.h" /* include public API definitions */ -/* begin file include/roaring/isadetection.h */ -/* From -https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h -Highly modified. - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, -Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute -(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, -Samy Bengio, Johnny Mariethoz) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories -America and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef ROARING_ISADETECTION_H -#define ROARING_ISADETECTION_H - -#include <stdint.h> -#include <stdbool.h> -#include <stdlib.h> -#if defined(_MSC_VER) -#include <intrin.h> -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) -#include <cpuid.h> -#endif // defined(_MSC_VER) - - -enum croaring_instruction_set { - CROARING_DEFAULT = 0x0, - CROARING_NEON = 0x1, - CROARING_AVX2 = 0x4, - CROARING_SSE42 = 0x8, - CROARING_PCLMULQDQ = 0x10, - CROARING_BMI1 = 0x20, - CROARING_BMI2 = 0x40, - CROARING_ALTIVEC = 0x80, - CROARING_UNINITIALIZED = 0x8000 -}; - -#if defined(__PPC64__) - -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_ALTIVEC; -} - -#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 - -#if defined(__ARM_NEON) - -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_NEON; -} - -#else // ARM without NEON - -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_DEFAULT; -} - -#endif - -#elif defined(__x86_64__) || defined(_M_AMD64) // x64 - - - - -static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, - uint32_t *edx) { - -#if defined(_MSC_VER) - int cpu_info[4]; - __cpuid(cpu_info, *eax); - *eax = cpu_info[0]; - *ebx = cpu_info[1]; - *ecx = cpu_info[2]; - *edx = cpu_info[3]; -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - uint32_t level = *eax; - __get_cpuid(level, eax, ebx, ecx, edx); -#else - uint32_t a = *eax, b, c = *ecx, d; - __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); - *eax = a; - *ebx = b; - *ecx = c; - *edx = d; -#endif -} - -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - uint32_t eax, ebx, ecx, edx; - uint32_t host_isa = 0x0; - // Can be found on Intel ISA Reference for CPUID - static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 - static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 - static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 - static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 - static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 - // ECX for EAX=0x7 - eax = 0x7; - ecx = 0x0; - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx & cpuid_avx2_bit) { - host_isa |= CROARING_AVX2; - } - if (ebx & cpuid_bmi1_bit) { - host_isa |= CROARING_BMI1; - } - - if (ebx & cpuid_bmi2_bit) { - host_isa |= CROARING_BMI2; - } - - // EBX for EAX=0x1 - eax = 0x1; - cpuid(&eax, &ebx, &ecx, &edx); - - if (ecx & cpuid_sse42_bit) { - host_isa |= CROARING_SSE42; - } - - if (ecx & cpuid_pclmulqdq_bit) { - host_isa |= CROARING_PCLMULQDQ; - } - - return host_isa; -} -#else // fallback - - -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_DEFAULT; -} - - -#endif // end SIMD extension detection code - - -#if defined(__x86_64__) || defined(_M_AMD64) // x64 - -#if defined(__cplusplus) -#include <atomic> -static inline uint32_t croaring_detect_supported_architectures() { - static std::atomic<int> buffer{CROARING_UNINITIALIZED}; - if(buffer == CROARING_UNINITIALIZED) { - buffer = dynamic_croaring_detect_supported_architectures(); - } - return buffer; -} -#elif defined(_MSC_VER) && !defined(__clang__) -// Visual Studio does not support C11 atomics. -static inline uint32_t croaring_detect_supported_architectures() { - static int buffer = CROARING_UNINITIALIZED; - if(buffer == CROARING_UNINITIALIZED) { - buffer = dynamic_croaring_detect_supported_architectures(); - } - return buffer; -} -#else // defined(__cplusplus) and defined(_MSC_VER) && !defined(__clang__) -#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5) -#define ROARING_DISABLE_AVX -#undef __AVX2__ -/* CentOS 7 */ -static inline uint32_t croaring_detect_supported_architectures() { - return(dynamic_croaring_detect_supported_architectures()); -} -#else -#include <stdatomic.h> -static inline uint32_t croaring_detect_supported_architectures() { - static _Atomic int buffer = CROARING_UNINITIALIZED; - if(buffer == CROARING_UNINITIALIZED) { - buffer = dynamic_croaring_detect_supported_architectures(); - } - return buffer; -} -#endif // (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5) -#endif // defined(_MSC_VER) && !defined(__clang__) - -#ifdef ROARING_DISABLE_AVX -static inline bool croaring_avx2() { - return false; -} -#elif defined(__AVX2__) -static inline bool croaring_avx2() { - return true; -} -#else -static inline bool croaring_avx2() { - return (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2; -} -#endif - - -#else // defined(__x86_64__) || defined(_M_AMD64) // x64 - -static inline bool croaring_avx2() { - return false; -} - -static inline uint32_t croaring_detect_supported_architectures() { - // no runtime dispatch - return dynamic_croaring_detect_supported_architectures(); -} -#endif // defined(__x86_64__) || defined(_M_AMD64) // x64 - -#endif // ROARING_ISADETECTION_H -/* end file include/roaring/isadetection.h */ /* begin file include/roaring/portability.h */ /* * portability.h * */ + /** + * All macros should be prefixed with either CROARING or ROARING. + * The library uses both ROARING_... + * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for + * macros that are provided by the build system or that are closely + * related to the format. The header macros may also use ROARING_. + * The CROARING_ prefix is for internal macros that a user is unlikely + * to ever interact with. + */ + #ifndef INCLUDE_PORTABILITY_H_ #define INCLUDE_PORTABILITY_H_ #ifndef _GNU_SOURCE -#define _GNU_SOURCE +#define _GNU_SOURCE 1 #endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif // __STDC_FORMAT_MACROS -#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) +#ifdef _MSC_VER +#define CROARING_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + */ +#ifdef __clang__ +// clang under visual studio +#define CROARING_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define CROARING_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) +#undef _POSIX_C_SOURCE +#endif + +#ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) @@ -292,7 +118,7 @@ static inline uint32_t croaring_detect_supported_architectures() { #include <stdbool.h> #include <stdint.h> #include <stdlib.h> // will provide posix_memalign with _POSIX_C_SOURCE as defined above -#if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) +#ifdef __GLIBC__ #include <malloc.h> // this should never be needed but there are some reports that it is needed. #endif @@ -300,7 +126,7 @@ static inline uint32_t croaring_detect_supported_architectures() { extern "C" { // portability definitions are in global scope, not a namespace #endif -#if defined(_MSC_VER) && !defined(__clang__) && !defined(WIN64) && !defined(ROARING_ACK_32BIT) +#if CROARING_REGULAR_VISUAL_STUDIO && !defined(_WIN64) && !defined(CROARING_ACK_32BIT) #pragma message( \ "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.") #endif @@ -309,41 +135,72 @@ extern "C" { // portability definitions are in global scope, not a namespace #error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. #endif -#if defined(_MSC_VER) +#if CROARING_REGULAR_VISUAL_STUDIO #define __restrict__ __restrict -#endif // defined(_MSC_VER +#endif // CROARING_REGULAR_VISUAL_STUDIO #if defined(__x86_64__) || defined(_M_X64) // we have an x64 processor -#define CROARING_IS_X64 +#define CROARING_IS_X64 1 #if defined(_MSC_VER) && (_MSC_VER < 1910) // Old visual studio systems won't support AVX2 well. #undef CROARING_IS_X64 #endif -#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5) - /* RH 7 don't have atomic includes */ -#undef CROARING_IS_X64 -#endif - - #if defined(__clang_major__) && (__clang_major__<= 8) && !defined(__AVX2__) // Older versions of clang have a bug affecting us // https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces #undef CROARING_IS_X64 #endif -#ifdef ROARING_DISABLE_X64 +#ifdef CROARING_DISABLE_X64 #undef CROARING_IS_X64 #endif // we include the intrinsic header -#ifndef _MSC_VER +#if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler */ #include <x86intrin.h> // on some recent GCC, this will declare posix_memalign -#endif // _MSC_VER + + + +#ifdef CROARING_CLANG_VISUAL_STUDIO + +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. These headers would + * normally guard against such usage, but we carefully included + * <x86intrin.h> (or <intrin.h>) before, so the headers + * are fooled. + */ +#include <bmiintrin.h> // for _blsr_u64 +#include <lzcntintrin.h> // for __lzcnt64 +#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64) +#include <smmintrin.h> +#include <tmmintrin.h> +#include <avxintrin.h> +#include <avx2intrin.h> +#include <wmmintrin.h> +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + + +#endif // CROARING_REGULAR_VISUAL_STUDIO #endif // defined(__x86_64__) || defined(_M_X64) #if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) @@ -353,14 +210,13 @@ extern "C" { // portability definitions are in global scope, not a namespace # include <arm_neon.h> #endif -#ifndef _MSC_VER +#if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline * assembly */ -#define ROARING_INLINE_ASM +#define CROARING_INLINE_ASM 1 #endif // _MSC_VER - -#ifdef _MSC_VER +#if CROARING_REGULAR_VISUAL_STUDIO /* Microsoft C/C++-compatible compiler */ #include <intrin.h> @@ -371,9 +227,9 @@ extern "C" { // portability definitions are in global scope, not a namespace /* wrappers for Visual Studio built-ins that look like gcc built-ins */ /* result might be undefined when input_num is zero */ -static inline int __builtin_ctzll(unsigned long long input_num) { +inline int __builtin_ctzll(unsigned long long input_num) { unsigned long index; -#ifdef WIN64 // highly recommended!!! +#ifdef _WIN64 // highly recommended!!! _BitScanForward64(&index, input_num); #else // if we must support 32-bit Windows if ((uint32_t)input_num != 0) { @@ -387,9 +243,9 @@ static inline int __builtin_ctzll(unsigned long long input_num) { } /* result might be undefined when input_num is zero */ -static inline int __builtin_clzll(unsigned long long input_num) { +inline int __builtin_clzll(unsigned long long input_num) { unsigned long index; -#ifdef WIN64 // highly recommended!!! +#ifdef _WIN64 // highly recommended!!! _BitScanReverse64(&index, input_num); #else // if we must support 32-bit Windows if (input_num > 0xFFFFFFFF) { @@ -422,45 +278,16 @@ static inline int __builtin_clzll(unsigned long long input_num) { #endif -// without the following, we get lots of warnings about posix_memalign -#ifndef __cplusplus -extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); -#endif //__cplusplus // C++ does not have a well defined signature - -// portable version of posix_memalign -static inline void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { - void *p; -#ifdef _MSC_VER - p = _aligned_malloc(size, alignment); -#elif defined(__MINGW32__) || defined(__MINGW64__) - p = __mingw_aligned_malloc(size, alignment); -#else - // somehow, if this is used before including "x86intrin.h", it creates an - // implicit defined warning. - if (posix_memalign(&p, alignment, size) != 0) return NULL; -#endif - return p; -} - -static inline void roaring_bitmap_aligned_free(void *memblock) { -#ifdef _MSC_VER - _aligned_free(memblock); -#elif defined(__MINGW32__) || defined(__MINGW64__) - __mingw_aligned_free(memblock); -#else - ndpi_free(memblock); -#endif -} - -#if defined(_MSC_VER) +#if CROARING_REGULAR_VISUAL_STUDIO #define ALIGNED(x) __declspec(align(x)) -#else -#if defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) #define ALIGNED(x) __attribute__((aligned(x))) -#endif +#else +#warning "Warning. Unrecognized compiler." +#define ALIGNED(x) #endif -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #define WARN_UNUSED __attribute__((warn_unused_result)) #else #define WARN_UNUSED @@ -468,6 +295,10 @@ static inline void roaring_bitmap_aligned_free(void *memblock) { #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) +#ifdef USENEON +// we can always compute the popcount fast. +#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO) +// we will need this function: static inline int hammingbackup(uint64_t x) { uint64_t c1 = UINT64_C(0x5555555555555555); uint64_t c2 = UINT64_C(0x3333333333333333); @@ -477,16 +308,20 @@ static inline int hammingbackup(uint64_t x) { x *= UINT64_C(0x0101010101010101); return x >> 56; } +#endif + static inline int hamming(uint64_t x) { -#if defined(WIN64) && defined(_MSC_VER) && !defined(__clang__) -#ifdef _M_ARM64 +#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO +#ifdef USENEON + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +#elif defined(_M_ARM64) return hammingbackup(x); // (int) _CountOneBits64(x); is unavailable #else // _M_ARM64 return (int) __popcnt64(x); #endif // _M_ARM64 -#elif defined(WIN32) && defined(_MSC_VER) && !defined(__clang__) +#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO #ifdef _M_ARM return hammingbackup(x); // _CountOneBits is unavailable @@ -568,8 +403,287 @@ static inline int hamming(uint64_t x) { #define CROARING_UNTARGET_REGION #endif +// Allow unaligned memory access +#if defined(__GNUC__) || defined(__clang__) +#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment"))) +#else +#define ALLOW_UNALIGNED +#endif + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #elif defined(_WIN32) + #define CROARING_IS_BIG_ENDIAN 0 + #else + #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + #include <machine/endian.h> + #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) + #include <sys/byteorder.h> + #else // defined(__APPLE__) || defined(__FreeBSD__) + + #ifdef __has_include + #if __has_include(<endian.h>) + #include <endian.h> + #endif //__has_include(<endian.h>) + #endif //__has_include + + #endif // defined(__APPLE__) || defined(__FreeBSD__) + + + #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) + #define CROARING_IS_BIG_ENDIAN 0 + #endif + + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define CROARING_IS_BIG_ENDIAN 0 + #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define CROARING_IS_BIG_ENDIAN 1 + #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#endif + +// We need portability.h to be included first, +// but we also always want isadetection.h to be +// included (right after). +// See https://github.com/RoaringBitmap/CRoaring/issues/394 +// There is no scenario where we want portability.h to +// be included, but not isadetection.h: the latter is a +// strict requirement. #endif /* INCLUDE_PORTABILITY_H_ */ /* end file include/roaring/portability.h */ +/* begin file include/roaring/isadetection.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ROARING_ISADETECTION_H +#define ROARING_ISADETECTION_H + +// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H). + +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> + +// We need portability.h to be included first, see +// https://github.com/RoaringBitmap/CRoaring/issues/394 +#if CROARING_REGULAR_VISUAL_STUDIO +#include <intrin.h> +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include <cpuid.h> +#endif // CROARING_REGULAR_VISUAL_STUDIO + + +enum croaring_instruction_set { + CROARING_DEFAULT = 0x0, + CROARING_NEON = 0x1, + CROARING_AVX2 = 0x4, + CROARING_SSE42 = 0x8, + CROARING_PCLMULQDQ = 0x10, + CROARING_BMI1 = 0x20, + CROARING_BMI2 = 0x40, + CROARING_ALTIVEC = 0x80, + CROARING_UNINITIALIZED = 0x8000 +}; + +#if defined(__PPC64__) + +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_ALTIVEC; +//} + +#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 + +#if defined(__ARM_NEON) + +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_NEON; +//} + +#else // ARM without NEON + +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_DEFAULT; +//} + +#endif + +#elif defined(__x86_64__) || defined(_M_AMD64) // x64 + + + + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { + +#if CROARING_REGULAR_VISUAL_STUDIO + int cpu_info[4]; + __cpuid(cpu_info, *eax); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); +#else + uint32_t a = *eax, b, c = *ecx, d; + __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; +#endif +} + +static inline uint32_t dynamic_croaring_detect_supported_architectures() { + uint32_t eax, ebx, ecx, edx; + uint32_t host_isa = 0x0; + // Can be found on Intel ISA Reference for CPUID + static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 + static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 + static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 + static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 + static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_avx2_bit) { + host_isa |= CROARING_AVX2; + } + if (ebx & cpuid_bmi1_bit) { + host_isa |= CROARING_BMI1; + } + + if (ebx & cpuid_bmi2_bit) { + host_isa |= CROARING_BMI2; + } + + // EBX for EAX=0x1 + eax = 0x1; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_sse42_bit) { + host_isa |= CROARING_SSE42; + } + + if (ecx & cpuid_pclmulqdq_bit) { + host_isa |= CROARING_PCLMULQDQ; + } + + return host_isa; +} +#else // fallback + + +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_DEFAULT; +//} + + +#endif // end SIMD extension detection code + + +#if defined(__x86_64__) || defined(_M_AMD64) // x64 + +#if defined(__cplusplus) +static inline uint32_t croaring_detect_supported_architectures() { + // thread-safe as per the C++11 standard. + static uint32_t buffer = dynamic_croaring_detect_supported_architectures(); + return buffer; +} +#elif CROARING_VISUAL_STUDIO +// Visual Studio does not support C11 atomics. +static inline uint32_t croaring_detect_supported_architectures() { + static int buffer = CROARING_UNINITIALIZED; + if (buffer == CROARING_UNINITIALIZED) { + buffer = dynamic_croaring_detect_supported_architectures(); + } + return buffer; +} +#else // CROARING_VISUAL_STUDIO +#include <stdatomic.h> +static inline uint32_t croaring_detect_supported_architectures() { + // we use an atomic for thread safety + static _Atomic uint32_t buffer = CROARING_UNINITIALIZED; + if (buffer == CROARING_UNINITIALIZED) { + // atomicity is sufficient + buffer = dynamic_croaring_detect_supported_architectures(); + } + return buffer; +} +#endif // CROARING_REGULAR_VISUAL_STUDIO + +#ifdef ROARING_DISABLE_AVX +static inline bool croaring_avx2() { + return false; +} +#elif defined(__AVX2__) +static inline bool croaring_avx2() { + return true; +} +#else +static inline bool croaring_avx2() { + return (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2; +} +#endif + + +#else // defined(__x86_64__) || defined(_M_AMD64) // x64 + +//static inline bool croaring_avx2() { +// return false; +//} + +//static inline uint32_t croaring_detect_supported_architectures() { +// // no runtime dispatch +// return dynamic_croaring_detect_supported_architectures(); +//} +#endif // defined(__x86_64__) || defined(_M_AMD64) // x64 + +#endif // ROARING_ISADETECTION_H +/* end file include/roaring/isadetection.h */ /* begin file include/roaring/containers/perfparameters.h */ #ifndef PERFPARAMETERS_H_ #define PERFPARAMETERS_H_ @@ -682,10 +796,10 @@ typedef ROARING_CONTAINER_T container_t; * downcast; only a static_cast<> is needed. Define a macro for static casting * which helps make casts more visible, and catches problems at compile-time * when building the C sources in C++ mode: - * + * * void some_func(container_t **c, ...) { // double pointer, not single * array_container_t *ac1 = (array_container_t *)(c); // uncaught!! - * + * * array_container_t *ac2 = CAST(array_container_t *, c) // C++ errors * array_container_t *ac3 = CAST_array(c); // shorthand for #2, errors * } @@ -694,7 +808,7 @@ typedef ROARING_CONTAINER_T container_t; * needs a reinterpret_cast<>, which sacrifices safety...so a template is used * leveraging <type_traits> to make sure it's legal in the C++ build. */ -#ifdef __cplusplus +#ifdef __cplusplus #define CAST(type,value) static_cast<type>(value) #define movable_CAST(type,value) movable_CAST_HELPER<type>(value) @@ -744,7 +858,7 @@ extern "C" { namespace roaring { namespace internal { * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) * keys the array sorted. */ -static inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, +inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, uint16_t ikey) { int32_t low = 0; int32_t high = lenarray - 1; @@ -847,121 +961,121 @@ static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, * C should have capacity greater than the minimum of s_1 and s_b + 8 * where 8 is sizeof(__m128i)/sizeof(uint16_t). */ -static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, +int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, const uint16_t *__restrict__ B, size_t s_b, uint16_t *C); /** * Compute the cardinality of the intersection using SSE4 instructions */ -static int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, +int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, size_t s_a, const uint16_t *__restrict__ B, size_t s_b); /* Computes the intersection between one small and one large set of uint16_t. * Stores the result into buffer and return the number of elements. */ -static int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, +int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, const uint16_t *largearray, size_t size_l, uint16_t *buffer); /* Computes the size of the intersection between one small and one large set of * uint16_t. */ -static int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, +int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, size_t size_s, const uint16_t *largearray, size_t size_l); /* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ -static bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, +bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, const uint16_t *largearray, size_t size_l); /** * Generic intersection function. */ -static int32_t intersect_uint16(const uint16_t *A, const size_t lenA, +int32_t intersect_uint16(const uint16_t *A, const size_t lenA, const uint16_t *B, const size_t lenB, uint16_t *out); /** * Compute the size of the intersection (generic). */ -static int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, +int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, const uint16_t *B, const size_t lenB); /** * Checking whether the size of the intersection is non-zero. */ -static bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, +bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, const uint16_t *B, const size_t lenB); /** * Generic union function. */ -static size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, size_t size_2, uint16_t *buffer); /** * Generic XOR function. */ -static int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, +int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, const uint16_t *array_2, int32_t card_2, uint16_t *out); /** * Generic difference function (ANDNOT). */ -static int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, +int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, int length2, uint16_t *a_out); /** * Generic intersection function. */ -static size_t intersection_uint32(const uint32_t *A, const size_t lenA, +size_t intersection_uint32(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t *out); /** * Generic intersection function, returns just the cardinality. */ -static size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, +size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB); /** * Generic union function. */ -static size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, +size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, size_t size_2, uint32_t *buffer); /** * A fast SSE-based union function. */ -static uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, +uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, const uint16_t *__restrict__ set_2, uint32_t size_2, uint16_t *__restrict__ buffer); /** * A fast SSE-based XOR function. */ -static uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, +uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, const uint16_t *__restrict__ array2, uint32_t length2, uint16_t *__restrict__ output); /** * A fast SSE-based difference function. */ -static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, +int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, const uint16_t *__restrict__ B, size_t s_b, uint16_t *C); /** * Generic union function, returns just the cardinality. */ -static size_t union_uint32_card(const uint32_t *set_1, size_t size_1, +size_t union_uint32_card(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, size_t size_2); /** * combines union_uint16 and union_vector16 optimally */ -static size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, size_t size_2, uint16_t *buffer); -static bool memequals(const void *s1, const void *s2, size_t n); +bool memequals(const void *s1, const void *s2, size_t n); #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { @@ -983,7 +1097,7 @@ static bool memequals(const void *s1, const void *s2, size_t n); extern "C" { namespace roaring { #endif -#if defined(ROARING_INLINE_ASM) +#if defined(CROARING_INLINE_ASM) #define CROARING_ASMBITMANIPOPTIMIZATION // optimization flag #define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \ @@ -1073,7 +1187,7 @@ static inline void bitset_set_range(uint64_t *words, uint32_t start, return; } words[firstword] |= (~UINT64_C(0)) << (start % 64); - uint32_t i; for (i = firstword + 1; i < endword; i++) { + for (uint32_t i = firstword + 1; i < endword; i++) { words[i] = ~UINT64_C(0); } words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); @@ -1094,7 +1208,7 @@ static inline int bitset_lenrange_cardinality(const uint64_t *words, << (start % 64)); } int answer = hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64))); - uint32_t i; for (i = firstword + 1; i < endword; i++) { + for (uint32_t i = firstword + 1; i < endword; i++) { answer += hamming(words[i]); } answer += @@ -1117,7 +1231,7 @@ static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start, if (((words[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) { return false; } - uint32_t i; for (i = firstword + 1; i < endword; i++) { + for (uint32_t i = firstword + 1; i < endword; i++) { if (words[i] != 0) { return false; } @@ -1143,7 +1257,7 @@ static inline void bitset_set_lenrange(uint64_t *words, uint32_t start, } uint64_t temp = words[endword]; words[firstword] |= (~UINT64_C(0)) << (start % 64); - uint32_t i; for (i = firstword + 1; i < endword; i += 2) + for (uint32_t i = firstword + 1; i < endword; i += 2) words[i] = words[i + 1] = ~UINT64_C(0); words[endword] = temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); @@ -1158,7 +1272,7 @@ static inline void bitset_flip_range(uint64_t *words, uint32_t start, uint32_t firstword = start / 64; uint32_t endword = (end - 1) / 64; words[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); - uint32_t i; for (i = firstword; i < endword; i++) { + for (uint32_t i = firstword; i < endword; i++) { words[i] = ~words[i]; } words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); @@ -1178,7 +1292,7 @@ static inline void bitset_reset_range(uint64_t *words, uint32_t start, return; } words[firstword] &= ~((~UINT64_C(0)) << (start % 64)); - uint32_t i; for (i = firstword + 1; i < endword; i++) { + for (uint32_t i = firstword + 1; i < endword; i++) { words[i] = UINT64_C(0); } words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); @@ -1199,7 +1313,7 @@ static inline void bitset_reset_range(uint64_t *words, uint32_t start, * * This function uses AVX2 decoding. */ -static size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, +size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, uint32_t *out, size_t outcapacity, uint32_t base); @@ -1212,7 +1326,7 @@ static size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, * * Returns how many values were actually decoded. */ -static size_t bitset_extract_setbits(const uint64_t *words, size_t length, +size_t bitset_extract_setbits(const uint64_t *words, size_t length, uint32_t *out, uint32_t base); /* @@ -1231,7 +1345,7 @@ static size_t bitset_extract_setbits(const uint64_t *words, size_t length, * * This function uses SSE decoding. */ -static size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, +size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, uint16_t *out, size_t outcapacity, uint16_t base); @@ -1245,7 +1359,7 @@ static size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t le * * Returns how many values were actually decoded. */ -static size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, +size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, uint16_t *out, uint16_t base); /* @@ -1258,7 +1372,7 @@ static size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length * * Returns how many values were actually decoded. */ -static size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1, +size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1, const uint64_t * __restrict__ words2, size_t length, uint16_t *out, uint16_t base); @@ -1269,13 +1383,13 @@ static size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __rest * and return the updated cardinality. This evidently assumes that the bitset * already contained data. */ -static uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, const uint16_t *list, uint64_t length); /* * Given a bitset, set all bit values in the list (there * are length of them). */ -static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length); +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length); /* * Given a bitset having cardinality card, unset all bit values in the list @@ -1283,7 +1397,7 @@ static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t leng * and return the updated cardinality. This evidently assumes that the bitset * already contained data. */ -static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, uint64_t length); /* @@ -1293,10 +1407,10 @@ static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t * already contained data. */ -static uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, +uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, const uint16_t *list, uint64_t length); -static void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length); +void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length); #ifdef CROARING_IS_X64 /*** @@ -1354,7 +1468,7 @@ CROARING_TARGET_AVX2 /** * Fast Harley-Seal AVX population count function */ -static inline uint64_t avx2_harley_seal_popcount256(const __m256i *data, +inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, const uint64_t size) { __m256i total = _mm256_setzero_si256(); __m256i ones = _mm256_setzero_si256(); @@ -1676,27 +1790,28 @@ typedef struct array_container_s array_container_t; /* Create a new array with default. Return NULL in case of failure. See also * array_container_create_given_capacity. */ -static array_container_t *array_container_create(void); +array_container_t *array_container_create(void); /* Create a new array with a specified capacity size. Return NULL in case of * failure. */ -static array_container_t *array_container_create_given_capacity(int32_t size); +array_container_t *array_container_create_given_capacity(int32_t size); /* Create a new array containing all values in [min,max). */ -static array_container_t * array_container_create_range(uint32_t min, uint32_t max); +array_container_t * array_container_create_range(uint32_t min, uint32_t max); /* * Shrink the capacity to the actual size, return the number of bytes saved. */ -static int array_container_shrink_to_fit(array_container_t *src); +int array_container_shrink_to_fit(array_container_t *src); /* Free memory owned by `array'. */ -static void array_container_free(array_container_t *array); +void array_container_free(array_container_t *array); /* Duplicate container */ -static array_container_t *array_container_clone(const array_container_t *src); +array_container_t *array_container_clone(const array_container_t *src); /* Get the cardinality of `array'. */ +ALLOW_UNALIGNED static inline int array_container_cardinality(const array_container_t *array) { return array->cardinality; } @@ -1707,18 +1822,14 @@ static inline bool array_container_nonzero_cardinality( } /* Copy one container into another. We assume that they are distinct. */ -static void array_container_copy(const array_container_t *src, array_container_t *dst); +void array_container_copy(const array_container_t *src, array_container_t *dst); /* Add all the values in [min,max) (included) at a distance k*step from min. The container must have a size less or equal to DEFAULT_MAX_SIZE after this addition. */ -static void array_container_add_from_range(array_container_t *arr, uint32_t min, +void array_container_add_from_range(array_container_t *arr, uint32_t min, uint32_t max, uint16_t step); -/* Set the cardinality to zero (does not release memory). */ -static inline void array_container_clear(array_container_t *array) { - array->cardinality = 0; -} static inline bool array_container_empty(const array_container_t *array) { return array->cardinality == 0; @@ -1733,35 +1844,35 @@ static inline bool array_container_full(const array_container_t *array) { /* Compute the union of `src_1' and `src_2' and write the result to `dst' * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -static void array_container_union(const array_container_t *src_1, +void array_container_union(const array_container_t *src_1, const array_container_t *src_2, array_container_t *dst); /* symmetric difference, see array_container_union */ -static void array_container_xor(const array_container_t *array_1, +void array_container_xor(const array_container_t *array_1, const array_container_t *array_2, array_container_t *out); /* Computes the intersection of src_1 and src_2 and write the result to * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -static void array_container_intersection(const array_container_t *src_1, +void array_container_intersection(const array_container_t *src_1, const array_container_t *src_2, array_container_t *dst); /* Check whether src_1 and src_2 intersect. */ -static bool array_container_intersect(const array_container_t *src_1, +bool array_container_intersect(const array_container_t *src_1, const array_container_t *src_2); /* computers the size of the intersection between two arrays. */ -static int array_container_intersection_cardinality(const array_container_t *src_1, +int array_container_intersection_cardinality(const array_container_t *src_1, const array_container_t *src_2); /* computes the intersection of array1 and array2 and write the result to * array1. * */ -static void array_container_intersection_inplace(array_container_t *src_1, +void array_container_intersection_inplace(array_container_t *src_1, const array_container_t *src_2); /* @@ -1772,22 +1883,22 @@ static void array_container_intersection_inplace(array_container_t *src_1, * The function returns the number of values written. * The caller is responsible for allocating enough memory in out. */ -static int array_container_to_uint32_array(void *vout, const array_container_t *cont, +int array_container_to_uint32_array(void *vout, const array_container_t *cont, uint32_t base); /* Compute the number of runs */ -static int32_t array_container_number_of_runs(const array_container_t *ac); +int32_t array_container_number_of_runs(const array_container_t *ac); /* * Print this container using printf (useful for debugging). */ -static void array_container_printf(const array_container_t *v); +void array_container_printf(const array_container_t *v); /* * Print this container using printf as a comma-separated list of 32-bit * integers starting at base. */ -static void array_container_printf_as_uint32_array(const array_container_t *v, +void array_container_printf_as_uint32_array(const array_container_t *v, uint32_t base); /** @@ -1803,12 +1914,12 @@ static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { * parameter. If preserve is false, then the new content will be uninitialized, * otherwise the old content is copied. */ -static void array_container_grow(array_container_t *container, int32_t min, +void array_container_grow(array_container_t *container, int32_t min, bool preserve); -static bool array_container_iterate(const array_container_t *cont, uint32_t base, +bool array_container_iterate(const array_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr); -static bool array_container_iterate64(const array_container_t *cont, uint32_t base, +bool array_container_iterate64(const array_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr); @@ -1820,7 +1931,7 @@ static bool array_container_iterate64(const array_container_t *cont, uint32_t ba * array_container_size_in_bytes(container). * */ -static int32_t array_container_write(const array_container_t *container, char *buf); +int32_t array_container_write(const array_container_t *container, char *buf); /** * Reads the instance from buf, outputs how many bytes were read. * This is meant to be byte-by-byte compatible with the Java and Go versions of @@ -1828,7 +1939,7 @@ static int32_t array_container_write(const array_container_t *container, char *b * The number of bytes read should be array_container_size_in_bytes(container). * You need to provide the (known) cardinality. */ -static int32_t array_container_read(int32_t cardinality, array_container_t *container, +int32_t array_container_read(int32_t cardinality, array_container_t *container, const char *buf); /** @@ -1847,6 +1958,7 @@ static inline int32_t array_container_size_in_bytes( /** * Return true if the two arrays have the same content. */ +ALLOW_UNALIGNED static inline bool array_container_equals( const array_container_t *container1, const array_container_t *container2) { @@ -1860,7 +1972,7 @@ static inline bool array_container_equals( /** * Return true if container1 is a subset of container2. */ -static bool array_container_is_subset(const array_container_t *container1, +bool array_container_is_subset(const array_container_t *container1, const array_container_t *container2); /** @@ -1886,7 +1998,7 @@ static inline bool array_container_select(const array_container_t *container, * to array out. * Array out does not need to be distinct from array_1 */ -static void array_container_andnot(const array_container_t *array_1, +void array_container_andnot(const array_container_t *array_1, const array_container_t *array_2, array_container_t *out); @@ -1960,7 +2072,7 @@ static inline bool array_container_remove(array_container_t *arr, } /* Check whether x is present. */ -static inline bool array_container_contains(const array_container_t *arr, +inline bool array_container_contains(const array_container_t *arr, uint16_t pos) { // return binarySearch(arr->array, arr->cardinality, pos) >= 0; // binary search with fallback to linear search for short ranges @@ -1980,8 +2092,7 @@ static inline bool array_container_contains(const array_container_t *arr, } } - int i; - for (i=low; i <= high; i++) { + for (int i=low; i <= high; i++) { uint16_t v = carr[i]; if (v == pos) { return true; @@ -1992,37 +2103,47 @@ static inline bool array_container_contains(const array_container_t *arr, } +void array_container_offset(const array_container_t *c, + container_t **loc, container_t **hic, + uint16_t offset); + //* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ static inline bool array_container_contains_range(const array_container_t *arr, uint32_t range_start, uint32_t range_end) { - + const int32_t range_count = range_end - range_start; const uint16_t rs_included = range_start; const uint16_t re_included = range_end - 1; - const uint16_t *carr = (const uint16_t *) arr->array; - - const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); - const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); + // Empty range is always included + if (range_count <= 0) { + return true; + } + if (range_count > arr->cardinality) { + return false; + } - return (start < arr->cardinality) && (end < arr->cardinality) - && (((uint16_t)(end - start)) == re_included - rs_included) - && (carr[start] == rs_included) && (carr[end] == re_included); + const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included); + // If this sorted array contains all items in the range: + // * the start item must be found + // * the last item in range range_count must exist, and be the expected end value + return (start >= 0) && (arr->cardinality >= start + range_count) && + (arr->array[start + range_count - 1] == re_included); } /* Returns the smallest value (assumes not empty) */ -static inline uint16_t array_container_minimum(const array_container_t *arr) { +inline uint16_t array_container_minimum(const array_container_t *arr) { if (arr->cardinality == 0) return 0; return arr->array[0]; } /* Returns the largest value (assumes not empty) */ -static inline uint16_t array_container_maximum(const array_container_t *arr) { +inline uint16_t array_container_maximum(const array_container_t *arr) { if (arr->cardinality == 0) return 0; return arr->array[arr->cardinality - 1]; } /* Returns the number of values equal or smaller than x */ -static inline int array_container_rank(const array_container_t *arr, uint16_t x) { +inline int array_container_rank(const array_container_t *arr, uint16_t x) { const int32_t idx = binarySearch(arr->array, arr->cardinality, x); const bool is_present = idx >= 0; if (is_present) { @@ -2033,7 +2154,7 @@ static inline int array_container_rank(const array_container_t *arr, uint16_t x) } /* Returns the index of the first value equal or smaller than x, or -1 */ -static inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { +inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { const int32_t idx = binarySearch(arr->array, arr->cardinality, x); const bool is_present = idx >= 0; if (is_present) { @@ -2061,21 +2182,22 @@ static inline void array_container_add_range_nvals(array_container_t *array, memmove(&(array->array[union_cardinality - nvals_greater]), &(array->array[array->cardinality - nvals_greater]), nvals_greater * sizeof(uint16_t)); - uint32_t i; for (i = 0; i <= max - min; i++) { + for (uint32_t i = 0; i <= max - min; i++) { array->array[nvals_less + i] = min + i; } array->cardinality = union_cardinality; } /** - * Adds all values in range [min,max]. + * Adds all values in range [min,max]. This function is currently unused + * and left as a documentation. */ -static inline void array_container_add_range(array_container_t *array, +/*static inline void array_container_add_range(array_container_t *array, uint32_t min, uint32_t max) { int32_t nvals_greater = count_greater(array->array, array->cardinality, max); int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); -} +}*/ /* * Removes all elements array[pos] .. array[pos+count-1] @@ -2138,24 +2260,24 @@ typedef struct bitset_container_s bitset_container_t; #define movable_CAST_bitset(c) movable_CAST(bitset_container_t **, c) /* Create a new bitset. Return NULL in case of failure. */ -static bitset_container_t *bitset_container_create(void); +bitset_container_t *bitset_container_create(void); /* Free memory. */ -static void bitset_container_free(bitset_container_t *bitset); +void bitset_container_free(bitset_container_t *bitset); /* Clear bitset (sets bits to 0). */ -static void bitset_container_clear(bitset_container_t *bitset); +void bitset_container_clear(bitset_container_t *bitset); /* Set all bits to 1. */ -static void bitset_container_set_all(bitset_container_t *bitset); +void bitset_container_set_all(bitset_container_t *bitset); /* Duplicate bitset */ -static bitset_container_t *bitset_container_clone(const bitset_container_t *src); +bitset_container_t *bitset_container_clone(const bitset_container_t *src); /* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow * and * should not be used in performance-sensitive code. Ever. */ -static void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, +void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, uint32_t end); #if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(__AVX2__) @@ -2171,8 +2293,8 @@ static inline void bitset_container_set(bitset_container_t *bitset, bitset->words[offset] = load; } -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, +/* Unset the ith bit. Currently unused. Could be used for optimization. */ +/*static inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos) { uint64_t shift = 6; uint64_t offset; @@ -2181,7 +2303,7 @@ static inline void bitset_container_unset(bitset_container_t *bitset, uint64_t load = bitset->words[offset]; ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); bitset->words[offset] = load; -} +}*/ /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower * than bitset_container_set. */ @@ -2216,7 +2338,7 @@ static inline bool bitset_container_remove(bitset_container_t *bitset, } /* Get the value of the ith bit. */ -static inline bool bitset_container_get(const bitset_container_t *bitset, +inline bool bitset_container_get(const bitset_container_t *bitset, uint16_t pos) { uint64_t word = bitset->words[pos >> 6]; const uint64_t p = pos; @@ -2236,15 +2358,15 @@ static inline void bitset_container_set(bitset_container_t *bitset, bitset->words[pos >> 6] = new_word; } -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, +/* Unset the ith bit. Currently unused. */ +/*static inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos) { const uint64_t old_word = bitset->words[pos >> 6]; const int index = pos & 63; const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); bitset->words[pos >> 6] = new_word; -} +}*/ /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower * than bitset_container_set. */ @@ -2273,7 +2395,7 @@ static inline bool bitset_container_remove(bitset_container_t *bitset, } /* Get the value of the ith bit. */ -static inline bool bitset_container_get(const bitset_container_t *bitset, +inline bool bitset_container_get(const bitset_container_t *bitset, uint16_t pos) { const uint64_t word = bitset->words[pos >> 6]; return (word >> (pos & 63)) & 1; @@ -2302,7 +2424,7 @@ static inline bool bitset_container_get_range(const bitset_container_t *bitset, return false; } - uint32_t i; for (i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ + for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false; } @@ -2311,7 +2433,7 @@ static inline bool bitset_container_get_range(const bitset_container_t *bitset, } /* Check whether `bitset' is present in `array'. Calls bitset_container_get. */ -static inline bool bitset_container_contains(const bitset_container_t *bitset, +inline bool bitset_container_contains(const bitset_container_t *bitset, uint16_t pos) { return bitset_container_get(bitset, pos); } @@ -2326,43 +2448,35 @@ static inline bool bitset_container_contains_range(const bitset_container_t *bit } /* Get the number of bits set */ +ALLOW_UNALIGNED static inline int bitset_container_cardinality( const bitset_container_t *bitset) { return bitset->cardinality; } + + + /* Copy one container into another. We assume that they are distinct. */ -static void bitset_container_copy(const bitset_container_t *source, +void bitset_container_copy(const bitset_container_t *source, bitset_container_t *dest); /* Add all the values [min,max) at a distance k*step from min: min, * min+step,.... */ -static void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, +void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, uint32_t max, uint16_t step); /* Get the number of bits set (force computation). This does not modify bitset. * To update the cardinality, you should do * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ -static int bitset_container_compute_cardinality(const bitset_container_t *bitset); +int bitset_container_compute_cardinality(const bitset_container_t *bitset); -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - when the cardinality is unknown, it is computed and stored in the struct */ -static inline bool bitset_container_nonzero_cardinality( - bitset_container_t *bitset) { - // account for laziness - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - // could bail early instead with a nonzero result - bitset->cardinality = bitset_container_compute_cardinality(bitset); - } - return bitset->cardinality > 0; -} - -/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse), +/* Check whether this bitset is empty, * it never modifies the bitset struct. */ static inline bool bitset_container_empty( const bitset_container_t *bitset) { if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { if((bitset->words[i]) != 0) return false; } return true; @@ -2381,99 +2495,102 @@ static inline bool bitset_container_const_nonzero_cardinality( /* * Check whether the two bitsets intersect */ -static bool bitset_container_intersect(const bitset_container_t *src_1, +bool bitset_container_intersect(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the * cardinality. */ -static int bitset_container_or(const bitset_container_t *src_1, +int bitset_container_or(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the union of bitsets `src_1' and `src_2' and return the cardinality. */ -static int bitset_container_or_justcard(const bitset_container_t *src_1, +int bitset_container_or_justcard(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the * cardinality. Same as bitset_container_or. */ -static int bitset_container_union(const bitset_container_t *src_1, +int bitset_container_union(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the union of bitsets `src_1' and `src_2' and return the * cardinality. Same as bitset_container_or_justcard. */ -static int bitset_container_union_justcard(const bitset_container_t *src_1, +int bitset_container_union_justcard(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not * update the cardinality. Provided to optimize chained operations. */ -static int bitset_container_or_nocard(const bitset_container_t *src_1, +int bitset_container_or_nocard(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and * return the cardinality. */ -static int bitset_container_and(const bitset_container_t *src_1, +int bitset_container_and(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the intersection of bitsets `src_1' and `src_2' and return the * cardinality. */ -static int bitset_container_and_justcard(const bitset_container_t *src_1, +int bitset_container_and_justcard(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and * return the cardinality. Same as bitset_container_and. */ -static int bitset_container_intersection(const bitset_container_t *src_1, +int bitset_container_intersection(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the intersection of bitsets `src_1' and `src_2' and return the * cardinality. Same as bitset_container_and_justcard. */ -static int bitset_container_intersection_justcard(const bitset_container_t *src_1, +int bitset_container_intersection_justcard(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does * not update the cardinality. Provided to optimize chained operations. */ -static int bitset_container_and_nocard(const bitset_container_t *src_1, +int bitset_container_and_nocard(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and * return the cardinality. */ -static int bitset_container_xor(const bitset_container_t *src_1, +int bitset_container_xor(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the exclusive or of bitsets `src_1' and `src_2' and return the * cardinality. */ -static int bitset_container_xor_justcard(const bitset_container_t *src_1, +int bitset_container_xor_justcard(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does * not update the cardinality. Provided to optimize chained operations. */ -static int bitset_container_xor_nocard(const bitset_container_t *src_1, +int bitset_container_xor_nocard(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the * cardinality. */ -static int bitset_container_andnot(const bitset_container_t *src_1, +int bitset_container_andnot(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Computes the and not of bitsets `src_1' and `src_2' and return the * cardinality. */ -static int bitset_container_andnot_justcard(const bitset_container_t *src_1, +int bitset_container_andnot_justcard(const bitset_container_t *src_1, const bitset_container_t *src_2); /* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does * not update the cardinality. Provided to optimize chained operations. */ -static int bitset_container_andnot_nocard(const bitset_container_t *src_1, +int bitset_container_andnot_nocard(const bitset_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); +void bitset_container_offset(const bitset_container_t *c, + container_t **loc, container_t **hic, + uint16_t offset); /* * Write out the 16-bit integers contained in this container as a list of 32-bit * integers using base @@ -2484,20 +2601,20 @@ static int bitset_container_andnot_nocard(const bitset_container_t *src_1, * The out pointer should point to enough memory (the cardinality times 32 * bits). */ -static int bitset_container_to_uint32_array(uint32_t *out, +int bitset_container_to_uint32_array(uint32_t *out, const bitset_container_t *bc, uint32_t base); /* * Print this container using printf (useful for debugging). */ -static void bitset_container_printf(const bitset_container_t *v); +void bitset_container_printf(const bitset_container_t *v); /* * Print this container using printf as a comma-separated list of 32-bit * integers starting at base. */ -static void bitset_container_printf_as_uint32_array(const bitset_container_t *v, +void bitset_container_printf_as_uint32_array(const bitset_container_t *v, uint32_t base); /** @@ -2510,11 +2627,11 @@ static inline int32_t bitset_container_serialized_size_in_bytes(void) { /** * Return the the number of runs. */ -static int bitset_container_number_of_runs(bitset_container_t *bc); +int bitset_container_number_of_runs(bitset_container_t *bc); -static bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, +bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr); -static bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, +bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr); @@ -2525,7 +2642,7 @@ static bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t * The number of bytes written should be * bitset_container_size_in_bytes(container). */ -static int32_t bitset_container_write(const bitset_container_t *container, char *buf); +int32_t bitset_container_write(const bitset_container_t *container, char *buf); /** * Reads the instance from buf, outputs how many bytes were read. @@ -2534,7 +2651,7 @@ static int32_t bitset_container_write(const bitset_container_t *container, char * The number of bytes read should be bitset_container_size_in_bytes(container). * You need to provide the (known) cardinality. */ -static int32_t bitset_container_read(int32_t cardinality, +int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, const char *buf); /** * Return the serialized size in bytes of a container (see @@ -2552,13 +2669,13 @@ static inline int32_t bitset_container_size_in_bytes( /** * Return true if the two containers have the same content. */ -static bool bitset_container_equals(const bitset_container_t *container1, +bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2); /** * Return true if container1 is a subset of container2. */ -static bool bitset_container_is_subset(const bitset_container_t *container1, +bool bitset_container_is_subset(const bitset_container_t *container1, const bitset_container_t *container2); /** @@ -2567,21 +2684,21 @@ static bool bitset_container_is_subset(const bitset_container_t *container1, * accordingly. * Otherwise, it returns false and update start_rank. */ -static bool bitset_container_select(const bitset_container_t *container, +bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element); /* Returns the smallest value (assumes not empty) */ -static uint16_t bitset_container_minimum(const bitset_container_t *container); +uint16_t bitset_container_minimum(const bitset_container_t *container); /* Returns the largest value (assumes not empty) */ -static uint16_t bitset_container_maximum(const bitset_container_t *container); +uint16_t bitset_container_maximum(const bitset_container_t *container); /* Returns the number of values equal or smaller than x */ -static int bitset_container_rank(const bitset_container_t *container, uint16_t x); +int bitset_container_rank(const bitset_container_t *container, uint16_t x); /* Returns the index of the first value equal or larger than x, or -1 */ -static int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); +int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { @@ -2657,22 +2774,22 @@ typedef struct run_container_s run_container_t; #define movable_CAST_run(c) movable_CAST(run_container_t **, c) /* Create a new run container. Return NULL in case of failure. */ -static run_container_t *run_container_create(void); +run_container_t *run_container_create(void); /* Create a new run container with given capacity. Return NULL in case of * failure. */ -static run_container_t *run_container_create_given_capacity(int32_t size); +run_container_t *run_container_create_given_capacity(int32_t size); /* * Shrink the capacity to the actual size, return the number of bytes saved. */ -static int run_container_shrink_to_fit(run_container_t *src); +int run_container_shrink_to_fit(run_container_t *src); /* Free memory owned by `run'. */ -static void run_container_free(run_container_t *run); +void run_container_free(run_container_t *run); /* Duplicate container */ -static run_container_t *run_container_clone(const run_container_t *src); +run_container_t *run_container_clone(const run_container_t *src); /* * Effectively deletes the value at index index, repacking data. @@ -2686,7 +2803,7 @@ static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) { /** * Good old binary search through rle data */ -static inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, +inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, uint16_t ikey) { int32_t low = 0; int32_t high = lenarray - 1; @@ -2777,7 +2894,7 @@ static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray * existing data needs to be copied over depends on copy. If "copy" is false, * then the new content will be uninitialized, otherwise a copy is made. */ -static void run_container_grow(run_container_t *run, int32_t min, bool copy); +void run_container_grow(run_container_t *run, int32_t min, bool copy); /** * Moves the data so that we can write data at index @@ -2794,7 +2911,7 @@ static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) { } /* Add `pos' to `run'. Returns true if `pos' was not present. */ -static bool run_container_add(run_container_t *run, uint16_t pos); +bool run_container_add(run_container_t *run, uint16_t pos); /* Remove `pos' from `run'. Returns true if `pos' was present. */ static inline bool run_container_remove(run_container_t *run, uint16_t pos) { @@ -2834,7 +2951,7 @@ static inline bool run_container_remove(run_container_t *run, uint16_t pos) { } /* Check whether `pos' is present in `run'. */ -static inline bool run_container_contains(const run_container_t *run, uint16_t pos) { +inline bool run_container_contains(const run_container_t *run, uint16_t pos) { int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); if (index >= 0) return true; index = -index - 2; // points to preceding value, possibly -1 @@ -2860,7 +2977,7 @@ static inline bool run_container_contains_range(const run_container_t *run, return false; } } - int32_t i; for (i = index; i < run->n_runs; ++i) { + for (int32_t i = index; i < run->n_runs; ++i) { const uint32_t stop = run->runs[i].value + run->runs[i].length; if (run->runs[i].value >= pos_end) break; if (stop >= pos_end) { @@ -2874,7 +2991,7 @@ static inline bool run_container_contains_range(const run_container_t *run, } /* Get the cardinality of `run'. Requires an actual computation. */ -static int run_container_cardinality(const run_container_t *run); +int run_container_cardinality(const run_container_t *run); /* Card > 0?, see run_container_empty for the reverse */ static inline bool run_container_nonzero_cardinality( @@ -2891,12 +3008,7 @@ static inline bool run_container_empty( /* Copy one container into another. We assume that they are distinct. */ -static void run_container_copy(const run_container_t *src, run_container_t *dst); - -/* Set the cardinality to zero (does not release memory). */ -static inline void run_container_clear(run_container_t *run) { - run->n_runs = 0; -} +void run_container_copy(const run_container_t *src, run_container_t *dst); /** * Append run described by vl to the run container, possibly merging. @@ -2978,31 +3090,31 @@ static inline bool run_container_is_full(const run_container_t *run) { /* Compute the union of `src_1' and `src_2' and write the result to `dst' * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -static void run_container_union(const run_container_t *src_1, +void run_container_union(const run_container_t *src_1, const run_container_t *src_2, run_container_t *dst); /* Compute the union of `src_1' and `src_2' and write the result to `src_1' */ -static void run_container_union_inplace(run_container_t *src_1, +void run_container_union_inplace(run_container_t *src_1, const run_container_t *src_2); /* Compute the intersection of src_1 and src_2 and write the result to * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -static void run_container_intersection(const run_container_t *src_1, +void run_container_intersection(const run_container_t *src_1, const run_container_t *src_2, run_container_t *dst); /* Compute the size of the intersection of src_1 and src_2 . */ -static int run_container_intersection_cardinality(const run_container_t *src_1, +int run_container_intersection_cardinality(const run_container_t *src_1, const run_container_t *src_2); /* Check whether src_1 and src_2 intersect. */ -static bool run_container_intersect(const run_container_t *src_1, +bool run_container_intersect(const run_container_t *src_1, const run_container_t *src_2); /* Compute the symmetric difference of `src_1' and `src_2' and write the result * to `dst' * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -static void run_container_xor(const run_container_t *src_1, +void run_container_xor(const run_container_t *src_1, const run_container_t *src_2, run_container_t *dst); /* @@ -3013,19 +3125,19 @@ static void run_container_xor(const run_container_t *src_1, * The function returns the number of values written. * The caller is responsible for allocating enough memory in out. */ -static int run_container_to_uint32_array(void *vout, const run_container_t *cont, +int run_container_to_uint32_array(void *vout, const run_container_t *cont, uint32_t base); /* * Print this container using printf (useful for debugging). */ -static void run_container_printf(const run_container_t *v); +void run_container_printf(const run_container_t *v); /* * Print this container using printf as a comma-separated list of 32-bit * integers starting at base. */ -static void run_container_printf_as_uint32_array(const run_container_t *v, +void run_container_printf_as_uint32_array(const run_container_t *v, uint32_t base); /** @@ -3036,9 +3148,9 @@ static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) { sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries. } -static bool run_container_iterate(const run_container_t *cont, uint32_t base, +bool run_container_iterate(const run_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr); -static bool run_container_iterate64(const run_container_t *cont, uint32_t base, +bool run_container_iterate64(const run_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr); @@ -3048,7 +3160,7 @@ static bool run_container_iterate64(const run_container_t *cont, uint32_t base, * Roaring. * The number of bytes written should be run_container_size_in_bytes(container). */ -static int32_t run_container_write(const run_container_t *container, char *buf); +int32_t run_container_write(const run_container_t *container, char *buf); /** * Reads the instance from buf, outputs how many bytes were read. @@ -3059,7 +3171,7 @@ static int32_t run_container_write(const run_container_t *container, char *buf); * but * it might be effectively ignored.. */ -static int32_t run_container_read(int32_t cardinality, run_container_t *container, +int32_t run_container_read(int32_t cardinality, run_container_t *container, const char *buf); /** @@ -3074,6 +3186,7 @@ static inline int32_t run_container_size_in_bytes( /** * Return true if the two containers have the same content. */ +ALLOW_UNALIGNED static inline bool run_container_equals(const run_container_t *container1, const run_container_t *container2) { if (container1->n_runs != container2->n_runs) { @@ -3086,14 +3199,14 @@ static inline bool run_container_equals(const run_container_t *container1, /** * Return true if container1 is a subset of container2. */ -static bool run_container_is_subset(const run_container_t *container1, +bool run_container_is_subset(const run_container_t *container1, const run_container_t *container2); /** * Used in a start-finish scan that appends segments, for XOR and NOT */ -static void run_container_smart_append_exclusive(run_container_t *src, +void run_container_smart_append_exclusive(run_container_t *src, const uint16_t start, const uint16_t length); @@ -3122,33 +3235,37 @@ static inline run_container_t *run_container_create_range(uint32_t start, * accordingly. * Otherwise, it returns false and update start_rank. */ -static bool run_container_select(const run_container_t *container, +bool run_container_select(const run_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element); /* Compute the difference of src_1 and src_2 and write the result to * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -static void run_container_andnot(const run_container_t *src_1, +void run_container_andnot(const run_container_t *src_1, const run_container_t *src_2, run_container_t *dst); +void run_container_offset(const run_container_t *c, + container_t **loc, container_t **hic, + uint16_t offset); + /* Returns the smallest value (assumes not empty) */ -static inline uint16_t run_container_minimum(const run_container_t *run) { +inline uint16_t run_container_minimum(const run_container_t *run) { if (run->n_runs == 0) return 0; return run->runs[0].value; } /* Returns the largest value (assumes not empty) */ -static inline uint16_t run_container_maximum(const run_container_t *run) { +inline uint16_t run_container_maximum(const run_container_t *run) { if (run->n_runs == 0) return 0; return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length; } /* Returns the number of values equal or smaller than x */ -static int run_container_rank(const run_container_t *arr, uint16_t x); +int run_container_rank(const run_container_t *arr, uint16_t x); /* Returns the index of the first run containing a value at least as large as x, or -1 */ -static inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { +inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x); if (index >= 0) return index; index = -index - 2; // points to preceding run, possibly -1 @@ -3194,14 +3311,15 @@ static inline void run_container_add_range_nruns(run_container_t* run, } /** - * Add all values in range [min, max] + * Add all values in range [min, max]. This function is currently unused + * and left as documentation. */ -static inline void run_container_add_range(run_container_t* run, +/*static inline void run_container_add_range(run_container_t* run, uint32_t min, uint32_t max) { int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); -} +}*/ /** * Shifts last $count elements either left (distance < 0) or right (distance > 0) @@ -3291,31 +3409,31 @@ extern "C" { namespace roaring { namespace internal { /* Convert an array into a bitset. The input container is not freed or modified. */ -static bitset_container_t *bitset_container_from_array(const array_container_t *arr); +bitset_container_t *bitset_container_from_array(const array_container_t *arr); /* Convert a run into a bitset. The input container is not freed or modified. */ -static bitset_container_t *bitset_container_from_run(const run_container_t *arr); +bitset_container_t *bitset_container_from_run(const run_container_t *arr); /* Convert a run into an array. The input container is not freed or modified. */ -static array_container_t *array_container_from_run(const run_container_t *arr); +array_container_t *array_container_from_run(const run_container_t *arr); /* Convert a bitset into an array. The input container is not freed or modified. */ -static array_container_t *array_container_from_bitset(const bitset_container_t *bits); +array_container_t *array_container_from_bitset(const bitset_container_t *bits); /* Convert an array into a run. The input container is not freed or modified. */ -static run_container_t *run_container_from_array(const array_container_t *c); +run_container_t *run_container_from_array(const array_container_t *c); /* convert a run into either an array or a bitset * might free the container. This does not free the input run container. */ -static container_t *convert_to_bitset_or_array_container( +container_t *convert_to_bitset_or_array_container( run_container_t *rc, int32_t card, uint8_t *resulttype); /* convert containers to and from runcontainers, as is most space efficient. * The container might be freed. */ -static container_t *convert_run_optimize( +container_t *convert_run_optimize( container_t *c, uint8_t typecode_original, uint8_t *typecode_after); @@ -3324,18 +3442,18 @@ static container_t *convert_run_optimize( /* If a conversion occurs, the caller is responsible to free the original * container and * he becomes reponsible to free the new one. */ -static container_t *convert_run_to_efficient_container( +container_t *convert_run_to_efficient_container( run_container_t *c, uint8_t *typecode_after); // like convert_run_to_efficient_container but frees the old result if needed -static container_t *convert_run_to_efficient_container_and_free( +container_t *convert_run_to_efficient_container_and_free( run_container_t *c, uint8_t *typecode_after); /** * Create new container which is a union of run container and * range [min, max]. Caller is responsible for freeing run container. */ -static container_t *container_from_run_range( +container_t *container_from_run_range( const run_container_t *run, uint32_t min, uint32_t max, uint8_t *typecode_after); @@ -3363,18 +3481,18 @@ extern "C" { namespace roaring { namespace internal { /** * Return true if the two containers have the same content. */ -static bool array_container_equal_bitset(const array_container_t* container1, +bool array_container_equal_bitset(const array_container_t* container1, const bitset_container_t* container2); /** * Return true if the two containers have the same content. */ -static bool run_container_equals_array(const run_container_t* container1, +bool run_container_equals_array(const run_container_t* container1, const array_container_t* container2); /** * Return true if the two containers have the same content. */ -static bool run_container_equals_bitset(const run_container_t* container1, +bool run_container_equals_bitset(const run_container_t* container1, const bitset_container_t* container2); #ifdef __cplusplus @@ -3400,31 +3518,31 @@ extern "C" { namespace roaring { namespace internal { /** * Return true if container1 is a subset of container2. */ -static bool array_container_is_subset_bitset(const array_container_t* container1, +bool array_container_is_subset_bitset(const array_container_t* container1, const bitset_container_t* container2); /** * Return true if container1 is a subset of container2. */ -static bool run_container_is_subset_array(const run_container_t* container1, +bool run_container_is_subset_array(const run_container_t* container1, const array_container_t* container2); /** * Return true if container1 is a subset of container2. */ -static bool array_container_is_subset_run(const array_container_t* container1, +bool array_container_is_subset_run(const array_container_t* container1, const run_container_t* container2); /** * Return true if container1 is a subset of container2. */ -static bool run_container_is_subset_bitset(const run_container_t* container1, +bool run_container_is_subset_bitset(const run_container_t* container1, const bitset_container_t* container2); /** * Return true if container1 is a subset of container2. */ -static bool bitset_container_is_subset_run(const bitset_container_t* container1, +bool bitset_container_is_subset_run(const bitset_container_t* container1, const run_container_t* container2); #ifdef __cplusplus @@ -3447,14 +3565,14 @@ extern "C" { namespace roaring { namespace internal { /* Compute the andnot of src_1 and src_2 and write the result to * dst, a valid array container that could be the same as dst.*/ -static void array_bitset_container_andnot(const array_container_t *src_1, +void array_bitset_container_andnot(const array_container_t *src_1, const bitset_container_t *src_2, array_container_t *dst); /* Compute the andnot of src_1 and src_2 and write the result to * src_1 */ -static void array_bitset_container_iandnot(array_container_t *src_1, +void array_bitset_container_iandnot(array_container_t *src_1, const bitset_container_t *src_2); /* Compute the andnot of src_1 and src_2 and write the result to @@ -3462,7 +3580,7 @@ static void array_bitset_container_iandnot(array_container_t *src_1, * Return true for a bitset result; false for array */ -static bool bitset_array_container_andnot( +bool bitset_array_container_andnot( const bitset_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3473,7 +3591,7 @@ static bool bitset_array_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_array_container_iandnot( +bool bitset_array_container_iandnot( bitset_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3484,7 +3602,7 @@ static bool bitset_array_container_iandnot( * result true) or an array container. */ -static bool run_bitset_container_andnot( +bool run_bitset_container_andnot( const run_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -3495,7 +3613,7 @@ static bool run_bitset_container_andnot( * result true) or an array container. */ -static bool run_bitset_container_iandnot( +bool run_bitset_container_iandnot( run_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -3506,7 +3624,7 @@ static bool run_bitset_container_iandnot( * result true) or an array container. */ -static bool bitset_run_container_andnot( +bool bitset_run_container_andnot( const bitset_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -3517,7 +3635,7 @@ static bool bitset_run_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_run_container_iandnot( +bool bitset_run_container_iandnot( bitset_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -3525,7 +3643,7 @@ static bool bitset_run_container_iandnot( * can become any type of container. */ -static int run_array_container_andnot( +int run_array_container_andnot( const run_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3536,13 +3654,13 @@ static int run_array_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static int run_array_container_iandnot( +int run_array_container_iandnot( run_container_t *src_1, const array_container_t *src_2, container_t **dst); /* dst must be a valid array container, allowed to be src_1 */ -static void array_run_container_andnot(const array_container_t *src_1, +void array_run_container_andnot(const array_container_t *src_1, const run_container_t *src_2, array_container_t *dst); @@ -3550,14 +3668,14 @@ static void array_run_container_andnot(const array_container_t *src_1, * can become any kind of container. */ -static void array_run_container_iandnot(array_container_t *src_1, +void array_run_container_iandnot(array_container_t *src_1, const run_container_t *src_2); /* dst does not indicate a valid container initially. Eventually it * can become any kind of container. */ -static int run_run_container_andnot( +int run_run_container_andnot( const run_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -3568,7 +3686,7 @@ static int run_run_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static int run_run_container_iandnot( +int run_run_container_iandnot( run_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -3576,13 +3694,13 @@ static int run_run_container_iandnot( * dst is a valid array container and may be the same as src_1 */ -static void array_array_container_andnot(const array_container_t *src_1, +void array_array_container_andnot(const array_container_t *src_1, const array_container_t *src_2, array_container_t *dst); /* inplace array-array andnot will always be able to reuse the space of * src_1 */ -static void array_array_container_iandnot(array_container_t *src_1, +void array_array_container_iandnot(array_container_t *src_1, const array_container_t *src_2); /* Compute the andnot of src_1 and src_2 and write the result to @@ -3590,7 +3708,7 @@ static void array_array_container_iandnot(array_container_t *src_1, * "dst is a bitset" */ -static bool bitset_bitset_container_andnot( +bool bitset_bitset_container_andnot( const bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -3601,7 +3719,7 @@ static bool bitset_bitset_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_bitset_container_iandnot( +bool bitset_bitset_container_iandnot( bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -3633,18 +3751,18 @@ extern "C" { namespace roaring { namespace internal { /* Compute the intersection of src_1 and src_2 and write the result to * dst. It is allowed for dst to be equal to src_1. We assume that dst is a * valid container. */ -static void array_bitset_container_intersection(const array_container_t *src_1, +void array_bitset_container_intersection(const array_container_t *src_1, const bitset_container_t *src_2, array_container_t *dst); /* Compute the size of the intersection of src_1 and src_2. */ -static int array_bitset_container_intersection_cardinality( +int array_bitset_container_intersection_cardinality( const array_container_t *src_1, const bitset_container_t *src_2); /* Checking whether src_1 and src_2 intersect. */ -static bool array_bitset_container_intersect(const array_container_t *src_1, +bool array_bitset_container_intersect(const array_container_t *src_1, const bitset_container_t *src_2); /* @@ -3653,14 +3771,14 @@ static bool array_bitset_container_intersect(const array_container_t *src_1, * otherwise is a array_container_t. We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -static bool bitset_bitset_container_intersection(const bitset_container_t *src_1, +bool bitset_bitset_container_intersection(const bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst); /* Compute the intersection between src_1 and src_2 and write the result to * dst. It is allowed for dst to be equal to src_1. We assume that dst is a * valid container. */ -static void array_run_container_intersection(const array_container_t *src_1, +void array_run_container_intersection(const array_container_t *src_1, const run_container_t *src_2, array_container_t *dst); @@ -3669,27 +3787,27 @@ static void array_run_container_intersection(const array_container_t *src_1, * otherwise is a array_container_t. * If *dst == src_2, then an in-place intersection is attempted **/ -static bool run_bitset_container_intersection(const run_container_t *src_1, +bool run_bitset_container_intersection(const run_container_t *src_1, const bitset_container_t *src_2, container_t **dst); /* Compute the size of the intersection between src_1 and src_2 . */ -static int array_run_container_intersection_cardinality(const array_container_t *src_1, +int array_run_container_intersection_cardinality(const array_container_t *src_1, const run_container_t *src_2); /* Compute the size of the intersection between src_1 and src_2 **/ -static int run_bitset_container_intersection_cardinality(const run_container_t *src_1, +int run_bitset_container_intersection_cardinality(const run_container_t *src_1, const bitset_container_t *src_2); /* Check that src_1 and src_2 intersect. */ -static bool array_run_container_intersect(const array_container_t *src_1, +bool array_run_container_intersect(const array_container_t *src_1, const run_container_t *src_2); /* Check that src_1 and src_2 intersect. **/ -static bool run_bitset_container_intersect(const run_container_t *src_1, +bool run_bitset_container_intersect(const run_container_t *src_1, const bitset_container_t *src_2); /* @@ -3700,7 +3818,7 @@ static bool run_bitset_container_intersect(const run_container_t *src_1, * to free the container. * In all cases, the result is in *dst. */ -static bool bitset_bitset_container_intersection_inplace( +bool bitset_bitset_container_intersection_inplace( bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -3731,7 +3849,7 @@ extern "C" { namespace roaring { namespace internal { * We assume that dst is pre-allocated and a valid bitset container * There can be no in-place version. */ -static void array_container_negation(const array_container_t *src, +void array_container_negation(const array_container_t *src, bitset_container_t *dst); /* Negation across the entire range of the container @@ -3741,7 +3859,7 @@ static void array_container_negation(const array_container_t *src, * We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -static bool bitset_container_negation( +bool bitset_container_negation( const bitset_container_t *src, container_t **dst); @@ -3754,7 +3872,7 @@ static bool bitset_container_negation( * to free the container. * In all cases, the result is in *dst. */ -static bool bitset_container_negation_inplace( +bool bitset_container_negation_inplace( bitset_container_t *src, container_t **dst); @@ -3765,7 +3883,7 @@ static bool bitset_container_negation_inplace( * We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -static int run_container_negation(const run_container_t *src, container_t **dst); +int run_container_negation(const run_container_t *src, container_t **dst); /* * Same as run_container_negation except that if the output is to @@ -3774,14 +3892,14 @@ static int run_container_negation(const run_container_t *src, container_t **dst) * then src is modified and no allocation is made. * In all cases, the result is in *dst. */ -static int run_container_negation_inplace(run_container_t *src, container_t **dst); +int run_container_negation_inplace(run_container_t *src, container_t **dst); /* Negation across a range of the container. * Compute the negation of src and write the result * to *dst. Returns true if the result is a bitset container * and false for an array container. *dst is not preallocated. */ -static bool array_container_negation_range( +bool array_container_negation_range( const array_container_t *src, const int range_start, const int range_end, container_t **dst); @@ -3790,7 +3908,7 @@ static bool array_container_negation_range( * inplace version without inefficient copying. Thus this routine * may be a wrapper for the non-in-place version */ -static bool array_container_negation_range_inplace( +bool array_container_negation_range_inplace( array_container_t *src, const int range_start, const int range_end, container_t **dst); @@ -3802,7 +3920,7 @@ static bool array_container_negation_range_inplace( * We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -static bool bitset_container_negation_range( +bool bitset_container_negation_range( const bitset_container_t *src, const int range_start, const int range_end, container_t **dst); @@ -3816,7 +3934,7 @@ static bool bitset_container_negation_range( * to free the container. * In all cases, the result is in *dst. */ -static bool bitset_container_negation_range_inplace( +bool bitset_container_negation_range_inplace( bitset_container_t *src, const int range_start, const int range_end, container_t **dst); @@ -3827,7 +3945,7 @@ static bool bitset_container_negation_range_inplace( * We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -static int run_container_negation_range( +int run_container_negation_range( const run_container_t *src, const int range_start, const int range_end, container_t **dst); @@ -3839,7 +3957,7 @@ static int run_container_negation_range( * then src is modified and no allocation is made. * In all cases, the result is in *dst. */ -static int run_container_negation_range_inplace( +int run_container_negation_range_inplace( run_container_t *src, const int range_start, const int range_end, container_t **dst); @@ -3871,14 +3989,14 @@ extern "C" { namespace roaring { namespace internal { /* Compute the union of src_1 and src_2 and write the result to * dst. It is allowed for src_2 to be dst. */ -static void array_bitset_container_union(const array_container_t *src_1, +void array_bitset_container_union(const array_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Compute the union of src_1 and src_2 and write the result to * dst. It is allowed for src_2 to be dst. This version does not * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -static void array_bitset_container_lazy_union(const array_container_t *src_1, +void array_bitset_container_lazy_union(const array_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); @@ -3888,7 +4006,7 @@ static void array_bitset_container_lazy_union(const array_container_t *src_1, * otherwise is a array_container_t. We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -static bool array_array_container_union( +bool array_array_container_union( const array_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3900,7 +4018,7 @@ static bool array_array_container_union( * it either written to src_1 (if *dst is null) or to *dst. * If the result is a bitset_container_t and *dst is null, then there was a failure. */ -static bool array_array_container_inplace_union( +bool array_array_container_inplace_union( array_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3908,7 +4026,7 @@ static bool array_array_container_inplace_union( * Same as array_array_container_union except that it will more eagerly produce * a bitset. */ -static bool array_array_container_lazy_union( +bool array_array_container_lazy_union( const array_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3916,7 +4034,7 @@ static bool array_array_container_lazy_union( * Same as array_array_container_inplace_union except that it will more eagerly produce * a bitset. */ -static bool array_array_container_lazy_inplace_union( +bool array_array_container_lazy_inplace_union( array_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -3925,7 +4043,7 @@ static bool array_array_container_lazy_inplace_union( * valid container. The result might need to be further converted to array or * bitset container, * the caller is responsible for the eventual conversion. */ -static void array_run_container_union(const array_container_t *src_1, +void array_run_container_union(const array_container_t *src_1, const run_container_t *src_2, run_container_t *dst); @@ -3933,7 +4051,7 @@ static void array_run_container_union(const array_container_t *src_1, * src2. The result might need to be further converted to array or * bitset container, * the caller is responsible for the eventual conversion. */ -static void array_run_container_inplace_union(const array_container_t *src_1, +void array_run_container_inplace_union(const array_container_t *src_1, run_container_t *src_2); /* Compute the union of src_1 and src_2 and write the result to @@ -3941,7 +4059,7 @@ static void array_run_container_inplace_union(const array_container_t *src_1, * If run_container_is_full(src_1) is true, you must not be calling this *function. **/ -static void run_bitset_container_union(const run_container_t *src_1, +void run_bitset_container_union(const run_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); @@ -3951,7 +4069,7 @@ static void run_bitset_container_union(const run_container_t *src_1, * If run_container_is_full(src_1) is true, you must not be calling this * function. * */ -static void run_bitset_container_lazy_union(const run_container_t *src_1, +void run_bitset_container_lazy_union(const run_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); @@ -3990,7 +4108,7 @@ extern "C" { namespace roaring { namespace internal { /* Compute the xor of src_1 and src_2 and write the result to * dst (which has no container initially). * Result is true iff dst is a bitset */ -static bool array_bitset_container_xor( +bool array_bitset_container_xor( const array_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -3999,7 +4117,7 @@ static bool array_bitset_container_xor( * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -static void array_bitset_container_lazy_xor(const array_container_t *src_1, +void array_bitset_container_lazy_xor(const array_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); /* Compute the xor of src_1 and src_2 and write the result to @@ -4007,7 +4125,7 @@ static void array_bitset_container_lazy_xor(const array_container_t *src_1, * "dst is a bitset" */ -static bool bitset_bitset_container_xor( +bool bitset_bitset_container_xor( const bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -4018,7 +4136,7 @@ static bool bitset_bitset_container_xor( * result true) or an array container. */ -static bool run_bitset_container_xor( +bool run_bitset_container_xor( const run_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -4027,7 +4145,7 @@ static bool run_bitset_container_xor( * cardinality would dictate an array container. */ -static void run_bitset_container_lazy_xor(const run_container_t *src_1, +void run_bitset_container_lazy_xor(const run_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst); @@ -4035,7 +4153,7 @@ static void run_bitset_container_lazy_xor(const run_container_t *src_1, * can become any kind of container. */ -static int array_run_container_xor( +int array_run_container_xor( const array_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -4043,7 +4161,7 @@ static int array_run_container_xor( * an array or a bitset container, indicated by return code */ -static bool array_array_container_xor( +bool array_array_container_xor( const array_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -4053,7 +4171,7 @@ static bool array_array_container_xor( * container type might not be correct for the actual cardinality */ -static bool array_array_container_lazy_xor( +bool array_array_container_lazy_xor( const array_container_t *src_1, const array_container_t *src_2, container_t **dst); @@ -4062,7 +4180,7 @@ static bool array_array_container_lazy_xor( * smaller. */ -static void array_run_container_lazy_xor(const array_container_t *src_1, +void array_run_container_lazy_xor(const array_container_t *src_1, const run_container_t *src_2, run_container_t *dst); @@ -4070,7 +4188,7 @@ static void array_run_container_lazy_xor(const array_container_t *src_1, * can become any kind of container. */ -static int run_run_container_xor( +int run_run_container_xor( const run_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -4085,15 +4203,15 @@ static int run_run_container_xor( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_array_container_ixor( +bool bitset_array_container_ixor( bitset_container_t *src_1, const array_container_t *src_2, container_t **dst); -static bool bitset_bitset_container_ixor( +bool bitset_bitset_container_ixor( bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst); -static bool array_bitset_container_ixor( +bool array_bitset_container_ixor( array_container_t *src_1, const bitset_container_t *src_2, container_t **dst); @@ -4104,11 +4222,11 @@ static bool array_bitset_container_ixor( * result true) or an array container. */ -static bool run_bitset_container_ixor( +bool run_bitset_container_ixor( run_container_t *src_1, const bitset_container_t *src_2, container_t **dst); -static bool bitset_run_container_ixor( +bool bitset_run_container_ixor( bitset_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -4116,19 +4234,19 @@ static bool bitset_run_container_ixor( * can become any kind of container. */ -static int array_run_container_ixor( +int array_run_container_ixor( array_container_t *src_1, const run_container_t *src_2, container_t **dst); -static int run_array_container_ixor( +int run_array_container_ixor( run_container_t *src_1, const array_container_t *src_2, container_t **dst); -static bool array_array_container_ixor( +bool array_array_container_ixor( array_container_t *src_1, const array_container_t *src_2, container_t **dst); -static int run_run_container_ixor( +int run_run_container_ixor( run_container_t *src_1, const run_container_t *src_2, container_t **dst); @@ -4204,18 +4322,18 @@ typedef struct shared_container_s shared_container_t; * If copy_on_write = false, then clone. * Return NULL in case of failure. **/ -static container_t *get_copy_of_container(container_t *container, uint8_t *typecode, +container_t *get_copy_of_container(container_t *container, uint8_t *typecode, bool copy_on_write); /* Frees a shared container (actually decrement its counter and only frees when * the counter falls to zero). */ -static void shared_container_free(shared_container_t *container); +void shared_container_free(shared_container_t *container); /* extract a copy from the shared container, freeing the shared container if there is just one instance left, clone instances when the counter is higher than one */ -static container_t *shared_container_extract_copy(shared_container_t *container, +container_t *shared_container_extract_copy(shared_container_t *container, uint8_t *typecode); /* access to container underneath */ @@ -4261,7 +4379,7 @@ static inline uint8_t get_container_type( * is responsible for deallocation. If the container is not shared, then it is * physically cloned. Sharable containers are not cloneable. */ -static container_t *container_clone(const container_t *container, uint8_t typecode); +container_t *container_clone(const container_t *container, uint8_t typecode); /* access to container underneath, cloning it if needed */ static inline container_t *get_writable_copy_if_shared( @@ -4311,7 +4429,7 @@ static inline bitset_container_t *container_to_bitset( * Get the container name from the typecode * (unused at time of writing) */ -static inline const char *get_container_name(uint8_t typecode) { +/*static inline const char *get_container_name(uint8_t typecode) { switch (typecode) { case BITSET_CONTAINER_TYPE: return container_names[0]; @@ -4326,7 +4444,7 @@ static inline const char *get_container_name(uint8_t typecode) { __builtin_unreachable(); return "unknown"; } -} +}*/ static inline const char *get_full_container_name( const container_t *c, uint8_t typecode @@ -4555,13 +4673,13 @@ static inline int32_t container_size_in_bytes( /** * print the container (useful for debugging), requires a typecode */ -static void container_printf(const container_t *container, uint8_t typecode); +void container_printf(const container_t *container, uint8_t typecode); /** * print the content of the container as a comma-separated list of 32-bit values * starting at base, requires a typecode */ -static void container_printf_as_uint32_array(const container_t *container, +void container_printf_as_uint32_array(const container_t *container, uint8_t typecode, uint32_t base); /** @@ -4588,7 +4706,7 @@ static inline bool container_nonzero_cardinality( /** * Recover memory from a container, requires a typecode */ -static void container_free(container_t *container, uint8_t typecode); +void container_free(container_t *container, uint8_t typecode); /** * Convert a container to an array of values, requires a typecode as well as a @@ -5308,7 +5426,7 @@ static inline container_t *container_lazy_or( CAST_run(result)); *result_type = RUN_CONTAINER_TYPE; // we are being lazy - result = convert_run_to_efficient_container( + result = convert_run_to_efficient_container_and_free( CAST_run(result), result_type); return result; @@ -5713,6 +5831,43 @@ static inline container_t* container_xor( } } +/* Applies an offset to the non-empty container 'c'. + * The results are stored in new containers returned via 'lo' and 'hi', for the + * low and high halves of the result (where the low half matches the original key + * and the high one corresponds to values for the following key). + * Either one of 'lo' and 'hi' are allowed to be 'NULL', but not both. + * Whenever one of them is not 'NULL', it should point to a 'NULL' container. + * Whenever one of them is 'NULL' the shifted elements for that part will not be + * computed. + * If either of the resulting containers turns out to be empty, the pointed + * container will remain 'NULL'. + */ +static inline void container_add_offset(const container_t *c, uint8_t type, + container_t **lo, container_t **hi, + uint16_t offset) { + assert(offset != 0); + assert(container_nonzero_cardinality(c, type)); + assert(lo != NULL || hi != NULL); + assert(lo == NULL || *lo == NULL); + assert(hi == NULL || *hi == NULL); + + switch (type) { + case BITSET_CONTAINER_TYPE: + bitset_container_offset(const_CAST_bitset(c), lo, hi, offset); + break; + case ARRAY_CONTAINER_TYPE: + array_container_offset(const_CAST_array(c), lo, hi, offset); + break; + case RUN_CONTAINER_TYPE: + run_container_offset(const_CAST_run(c), lo, hi, offset); + break; + default: + assert(false); + __builtin_unreachable(); + break; + } +} + /** * Compute xor between two containers, generate a new container (having type * result_type), requires a typecode. This allocates new memory, caller @@ -6522,7 +6677,7 @@ static inline container_t *container_remove_range( if (result_cardinality == 0) { return NULL; - } else if (result_cardinality < DEFAULT_MAX_SIZE) { + } else if (result_cardinality <= DEFAULT_MAX_SIZE) { *result_type = ARRAY_CONTAINER_TYPE; bitset_reset_range(bitset->words, min, max+1); bitset->cardinality = result_cardinality; @@ -6561,15 +6716,7 @@ static inline container_t *container_remove_range( } run_container_remove_range(run, min, max); - - if (run_container_serialized_size_in_bytes(run->n_runs) <= - bitset_container_serialized_size_in_bytes()) { - *result_type = RUN_CONTAINER_TYPE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE; - return bitset_container_from_run(run); - } + return convert_run_to_efficient_container(run, result_type); } default: __builtin_unreachable(); @@ -6610,55 +6757,55 @@ enum { /** * Create a new roaring array */ -static roaring_array_t *ra_create(void); +roaring_array_t *ra_create(void); /** * Initialize an existing roaring array with the specified capacity (in number * of containers) */ -static bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); /** * Initialize with zero capacity */ -static void ra_init(roaring_array_t *t); +void ra_init(roaring_array_t *t); /** * Copies this roaring array, we assume that dest is not initialized */ -static bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, +bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, bool copy_on_write); /* * Shrinks the capacity, returns the number of bytes saved. */ -static int ra_shrink_to_fit(roaring_array_t *ra); +int ra_shrink_to_fit(roaring_array_t *ra); /** * Copies this roaring array, we assume that dest is initialized */ -static bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, bool copy_on_write); /** * Frees the memory used by a roaring array */ -static void ra_clear(roaring_array_t *r); +void ra_clear(roaring_array_t *r); /** * Frees the memory used by a roaring array, but does not free the containers */ -static void ra_clear_without_containers(roaring_array_t *r); +void ra_clear_without_containers(roaring_array_t *r); /** * Frees just the containers */ -static void ra_clear_containers(roaring_array_t *ra); +void ra_clear_containers(roaring_array_t *ra); /** * Get the index corresponding to a 16-bit key */ -static inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { +inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1; return binarySearch(ra->keys, (int32_t)ra->size, x); } @@ -6666,7 +6813,7 @@ static inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { /** * Retrieves the container at index i, filling in the typecode */ -static inline container_t *ra_get_container_at_index( +inline container_t *ra_get_container_at_index( const roaring_array_t *ra, uint16_t i, uint8_t *typecode ){ *typecode = ra->typecodes[i]; @@ -6676,19 +6823,21 @@ static inline container_t *ra_get_container_at_index( /** * Retrieves the key at index i */ -static uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); +inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { + return ra->keys[i]; +} /** * Add a new key-value pair at index i */ -static void ra_insert_new_key_value_at( +void ra_insert_new_key_value_at( roaring_array_t *ra, int32_t i, uint16_t key, container_t *c, uint8_t typecode); /** * Append a new key-value pair */ -static void ra_append( +void ra_append( roaring_array_t *ra, uint16_t key, container_t *c, uint8_t typecode); @@ -6696,7 +6845,7 @@ static void ra_append( * Append a new key-value pair to ra, cloning (in COW sense) a value from sa * at index index */ -static void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, uint16_t index, bool copy_on_write); /** @@ -6704,21 +6853,21 @@ static void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, * at indexes * [start_index, end_index) */ -static void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, int32_t start_index, int32_t end_index, bool copy_on_write); /** appends from sa to ra, ending with the greatest key that is * is less or equal stopping_key */ -static void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, uint16_t stopping_key, bool copy_on_write); /** appends from sa to ra, starting with the smallest key that is * is strictly greater than before_start */ -static void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, uint16_t before_start, bool copy_on_write); /** @@ -6726,13 +6875,13 @@ static void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *s * [start_index, end_index), old array should not be freed * (use ra_clear_without_containers) **/ -static void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, int32_t start_index, int32_t end_index); /** * Append new key-value pairs to ra, from sa at indexes * [start_index, end_index) */ -static void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, int32_t start_index, int32_t end_index, bool copy_on_write); @@ -6740,7 +6889,7 @@ static void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, * Set the container at the corresponding index using the specified * typecode. */ -static inline void ra_set_container_at_index( +inline void ra_set_container_at_index( const roaring_array_t *ra, int32_t i, container_t *c, uint8_t typecode ){ @@ -6754,20 +6903,20 @@ static inline void ra_set_container_at_index( * (at * least); */ -static bool extend_array(roaring_array_t *ra, int32_t k); +bool extend_array(roaring_array_t *ra, int32_t k); -static inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } +inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, int32_t pos) { return advanceUntil(ra->keys, pos, ra->size, x); } -static int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); -static void ra_downsize(roaring_array_t *ra, int32_t new_length); +void ra_downsize(roaring_array_t *ra, int32_t new_length); -static inline void ra_replace_key_and_container_at_index( +inline void ra_replace_key_and_container_at_index( roaring_array_t *ra, int32_t i, uint16_t key, container_t *c, uint8_t typecode ){ @@ -6779,9 +6928,9 @@ static inline void ra_replace_key_and_container_at_index( } // write set bits to an array -static void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); -static bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); /** * write a bitmap to a buffer. This is meant to be compatible with @@ -6789,7 +6938,7 @@ static bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size * Java and Go versions. Return the size in bytes of the serialized * output (which should be ra_portable_size_in_bytes(ra)). */ -static size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); /** * read a bitmap from a serialized version. This is meant to be compatible @@ -6799,7 +6948,7 @@ static size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); * and *readbytes indicates how many bytes were read. In all cases, if the function * returns true, then maxbytes >= *readbytes. */ -static bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); +bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); /** * Quickly checks whether there is a serialized bitmap at the pointer, @@ -6809,25 +6958,25 @@ static bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const * This function returns 0 if and only if no valid bitmap is found. * Otherwise, it returns how many bytes are occupied by the bitmap data. */ -static size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); /** * How many bytes are required to serialize this bitmap (meant to be * compatible * with Java and Go versions) */ -static size_t ra_portable_size_in_bytes(const roaring_array_t *ra); +size_t ra_portable_size_in_bytes(const roaring_array_t *ra); /** * return true if it contains at least one run container. */ -static bool ra_has_run_container(const roaring_array_t *ra); +bool ra_has_run_container(const roaring_array_t *ra); /** * Size of the header when serializing (meant to be compatible * with Java and Go versions) */ -static uint32_t ra_portable_header_size(const roaring_array_t *ra); +uint32_t ra_portable_header_size(const roaring_array_t *ra); /** * If the container at the index i is share, unshare it (creating a local @@ -6843,18 +6992,18 @@ static inline void ra_unshare_container_at_index(roaring_array_t *ra, /** * remove at index i, sliding over all entries after i */ -static void ra_remove_at_index(roaring_array_t *ra, int32_t i); +void ra_remove_at_index(roaring_array_t *ra, int32_t i); /** * clears all containers, sets the size at 0 and shrinks the memory usage. */ -static void ra_reset(roaring_array_t *ra); +void ra_reset(roaring_array_t *ra); /** * remove at index i, sliding over all entries after i. Free removed container. */ -static void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); /** * remove a chunk of indices, sliding over entries after it @@ -6865,7 +7014,7 @@ static void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); // the mutated RoaringBitmap that are after the largest container of // the argument RoaringBitmap. It is followed by a call to resize. // -static void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, uint32_t new_begin); /** @@ -6875,7 +7024,7 @@ static void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, * This function doesn't free or create new containers. * Caller is responsible for that. */ -static void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); #ifdef __cplusplus } // namespace internal @@ -6884,4378 +7033,6 @@ static void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); #endif /* end file include/roaring/roaring_array.h */ -/* begin file include/roaring/misc/configreport.h */ -/* - * configreport.h - * - */ - -#ifndef INCLUDE_MISC_CONFIGREPORT_H_ -#define INCLUDE_MISC_CONFIGREPORT_H_ - -#include <stddef.h> // for size_t -#include <stdint.h> -#include <stdio.h> - - -#ifdef __cplusplus -extern "C" { namespace roaring { namespace misc { -#endif - -#ifdef CROARING_IS_X64 -// useful for basic info (0) -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) { -#ifdef ROARING_INLINE_ASM - __asm volatile("cpuid" - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) - : "0"(*eax), "2"(*ecx)); -#endif /* not sure what to do when static inline assembly is unavailable*/ -} - -// CPUID instruction takes no parameters as CPUID implicitly uses the EAX -// register. -// The EAX register should be loaded with a value specifying what information to -// return -static inline void cpuinfo(int code, int *eax, int *ebx, int *ecx, int *edx) { -#ifdef ROARING_INLINE_ASM - __asm__ volatile("cpuid;" // call cpuid instruction - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), - "=d"(*edx) // output equal to "movl %%eax %1" - : "a"(code) // input equal to "movl %1, %%eax" - //:"%eax","%ebx","%ecx","%edx"// clobbered register - ); -#endif /* not sure what to do when static inline assembly is unavailable*/ -} - -static inline int computecacheline() { - int eax = 0, ebx = 0, ecx = 0, edx = 0; - cpuinfo((int)0x80000006, &eax, &ebx, &ecx, &edx); - return ecx & 0xFF; -} - -// this is quite imperfect, but can be handy -static inline const char *guessprocessor() { - unsigned eax = 1, ebx = 0, ecx = 0, edx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - const char *codename; - switch (eax >> 4) { - case 0x506E: - codename = "Skylake"; - break; - case 0x406C: - codename = "CherryTrail"; - break; - case 0x306D: - codename = "Broadwell"; - break; - case 0x306C: - codename = "Haswell"; - break; - case 0x306A: - codename = "IvyBridge"; - break; - case 0x206A: - case 0x206D: - codename = "SandyBridge"; - break; - case 0x2065: - case 0x206C: - case 0x206F: - codename = "Westmere"; - break; - case 0x106E: - case 0x106A: - case 0x206E: - codename = "Nehalem"; - break; - case 0x1067: - case 0x106D: - codename = "Penryn"; - break; - case 0x006F: - case 0x1066: - codename = "Merom"; - break; - case 0x0066: - codename = "Presler"; - break; - case 0x0063: - case 0x0064: - codename = "Prescott"; - break; - case 0x006D: - codename = "Dothan"; - break; - case 0x0366: - codename = "Cedarview"; - break; - case 0x0266: - codename = "Lincroft"; - break; - case 0x016C: - codename = "Pineview"; - break; - default: - codename = "UNKNOWN"; - break; - } - return codename; -} - -static inline void tellmeall() { - printf("x64 processor: %s\t", guessprocessor()); - -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - uint32_t config = croaring_detect_supported_architectures(); - if((config & CROARING_NEON) == CROARING_NEON) { - printf(" NEON detected\t"); - } - #ifdef __AVX2__ - printf(" Building for AVX2\t"); - #endif - if(croaring_avx2()) { - printf( "AVX2 usable\t"); - } - if((config & CROARING_AVX2) == CROARING_AVX2) { - printf( "AVX2 detected\t"); - if(!croaring_avx2()) { - printf( "AVX2 not used\t"); - } - } - if((config & CROARING_SSE42) == CROARING_SSE42) { - printf(" SSE4.2 detected\t"); - } - if((config & CROARING_BMI1) == CROARING_BMI1) { - printf(" BMI1 detected\t"); - } - if((config & CROARING_BMI2) == CROARING_BMI2) { - printf(" BMI2 detected\t"); - } - printf("\n"); - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); - } -#ifdef __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#ifdef __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#ifdef __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif - if (computecacheline() != 64) - printf("cache line: %d bytes\n", computecacheline()); -} -#else - -static inline void tellmeall() { - printf("Non-X64 processor\n"); -#ifdef __arm__ - printf("ARM processor detected\n"); -#endif -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - uint32_t config = croaring_detect_supported_architectures(); - if((config & CROARING_NEON) == CROARING_NEON) { - printf(" NEON detected\t"); - } - if((config & CROARING_ALTIVEC) == CROARING_ALTIVEC) { - printf("Altivec detected\n"); - } - - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); - } -#ifdef __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#ifdef __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#ifdef __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif -} - -#endif - -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace misc { -#endif - -#endif /* INCLUDE_MISC_CONFIGREPORT_H_ */ -/* end file include/roaring/misc/configreport.h */ -/* begin file src/roaring_array.c */ -#include <assert.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <inttypes.h> - - -#ifdef __cplusplus -extern "C" { namespace roaring { namespace internal { -#endif - -// Convention: [0,ra->size) all elements are initialized -// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing - -extern inline int32_t ra_get_size(const roaring_array_t *ra); -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - -extern inline container_t *ra_get_container_at_index( - const roaring_array_t *ra, uint16_t i, - uint8_t *typecode); - -extern inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i); - -extern inline void ra_replace_key_and_container_at_index( - roaring_array_t *ra, int32_t i, uint16_t key, - container_t *c, uint8_t typecode); - -extern inline void ra_set_container_at_index( - const roaring_array_t *ra, int32_t i, - container_t *c, uint8_t typecode); - -static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { - // - // Note: not implemented using C's realloc(), because the memory layout is - // Struct-of-Arrays vs. Array-of-Structs: - // https://github.com/RoaringBitmap/CRoaring/issues/256 - - if ( new_capacity == 0 ) { - ndpi_free(ra->containers); - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; - ra->allocation_size = 0; - return true; - } - const size_t memoryneeded = new_capacity * ( - sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); - void *bigalloc = ndpi_malloc(memoryneeded); - if (!bigalloc) return false; - void *oldbigalloc = ra->containers; - container_t **newcontainers = (container_t **)bigalloc; - uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); - uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); - assert((char *)(newtypecodes + new_capacity) == - (char *)bigalloc + memoryneeded); - if(ra->size > 0) { - memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); - memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); - memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); - } - ra->containers = newcontainers; - ra->keys = newkeys; - ra->typecodes = newtypecodes; - ra->allocation_size = new_capacity; - ndpi_free(oldbigalloc); - return true; -} - -static bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { - if (!new_ra) return false; - ra_init(new_ra); - - if (cap > INT32_MAX) { return false; } - - if(cap > 0) { - void *bigalloc = ndpi_malloc(cap * - (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); - if( bigalloc == NULL ) return false; - new_ra->containers = (container_t **)bigalloc; - new_ra->keys = (uint16_t *)(new_ra->containers + cap); - new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); - // Narrowing is safe because of above check - new_ra->allocation_size = (int32_t)cap; - } - return true; -} - -static int ra_shrink_to_fit(roaring_array_t *ra) { - int savings = (ra->allocation_size - ra->size) * - (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); - if (!realloc_array(ra, ra->size)) { - return 0; - } - ra->allocation_size = ra->size; - return savings; -} - -static void ra_init(roaring_array_t *new_ra) { - if (!new_ra) { return; } - new_ra->keys = NULL; - new_ra->containers = NULL; - new_ra->typecodes = NULL; - - new_ra->allocation_size = 0; - new_ra->size = 0; - new_ra->flags = 0; -} - -static bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - ra_clear_containers(dest); // we are going to overwrite them - if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size - dest->size = 0; // <--- This is important. - return true; // output was just cleared, so they match - } - if (dest->allocation_size < source->size) { - if (!realloc_array(dest, source->size)) { - return false; - } - } - dest->size = source->size; - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - int32_t i; for (i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - memcpy(dest->containers, source->containers, - dest->size * sizeof(container_t *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } else { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - int32_t i; for (i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - int32_t j; for (j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; -} - -static void ra_clear_containers(roaring_array_t *ra) { - int32_t i; for (i = 0; i < ra->size; ++i) { - container_free(ra->containers[i], ra->typecodes[i]); - } -} - -static void ra_reset(roaring_array_t *ra) { - ra_clear_containers(ra); - ra->size = 0; - ra_shrink_to_fit(ra); -} - -static void ra_clear_without_containers(roaring_array_t *ra) { - ndpi_free(ra->containers); // keys and typecodes are allocated with containers - ra->size = 0; - ra->allocation_size = 0; - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; -} - -static void ra_clear(roaring_array_t *ra) { - ra_clear_containers(ra); - ra_clear_without_containers(ra); -} - -static bool extend_array(roaring_array_t *ra, int32_t k) { - int32_t desired_size = ra->size + k; - assert(desired_size <= MAX_CONTAINERS); - if (desired_size > ra->allocation_size) { - int32_t new_capacity = - (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; - if (new_capacity > MAX_CONTAINERS) { - new_capacity = MAX_CONTAINERS; - } - - return realloc_array(ra, new_capacity); - } - return true; -} - -static void ra_append( - roaring_array_t *ra, uint16_t key, - container_t *c, uint8_t typecode -){ - extend_array(ra, 1); - const int32_t pos = ra->size; - - ra->keys[pos] = key; - ra->containers[pos] = c; - ra->typecodes[pos] = typecode; - ra->size++; -} - -static void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - // old contents is junk not needing freeing - ra->keys[pos] = sa->keys[index]; - // the shared container will be in two bitmaps - if (copy_on_write) { - sa->containers[index] = get_copy_of_container( - sa->containers[index], &sa->typecodes[index], copy_on_write); - ra->containers[pos] = sa->containers[index]; - ra->typecodes[pos] = sa->typecodes[index]; - } else { - ra->containers[pos] = - container_clone(sa->containers[index], sa->typecodes[index]); - ra->typecodes[pos] = sa->typecodes[index]; - } - ra->size++; -} - -static void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write) { - int32_t i; for (i = 0; i < sa->size; ++i) { - if (sa->keys[i] >= stopping_key) break; - ra_append_copy(ra, sa, i, copy_on_write); - } -} - -static void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - int32_t i; for (i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} - -static void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write) { - int start_location = ra_get_index(sa, before_start); - if (start_location >= 0) - ++start_location; - else - start_location = -start_location - 1; - ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); -} - -static void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index) { - extend_array(ra, end_index - start_index); - - int32_t i; for (i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - - ra->keys[pos] = sa->keys[i]; - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - ra->size++; - } -} - -static void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - - int32_t i; for (i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} - -static container_t *ra_get_container( - roaring_array_t *ra, uint16_t x, uint8_t *typecode -){ - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -extern inline container_t *ra_get_container_at_index( - const roaring_array_t *ra, uint16_t i, - uint8_t *typecode); - -#ifdef ROARING_NOT_USED -static container_t *ra_get_writable_container( - roaring_array_t *ra, uint16_t x, - uint8_t *typecode -){ - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); -} - -static container_t *ra_get_writable_container_at_index( - roaring_array_t *ra, uint16_t i, - uint8_t *typecode -){ - assert(i < ra->size); - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); -} -#endif - -static uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { - return ra->keys[i]; -} - -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - -extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos); - -// everything skipped over is freed -static int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { - while (pos < ra->size && ra->keys[pos] < x) { - container_free(ra->containers[pos], ra->typecodes[pos]); - ++pos; - } - return pos; -} - -static void ra_insert_new_key_value_at( - roaring_array_t *ra, int32_t i, uint16_t key, - container_t *c, uint8_t typecode -){ - extend_array(ra, 1); - // May be an optimization opportunity with DIY memmove - memmove(&(ra->keys[i + 1]), &(ra->keys[i]), - sizeof(uint16_t) * (ra->size - i)); - memmove(&(ra->containers[i + 1]), &(ra->containers[i]), - sizeof(container_t *) * (ra->size - i)); - memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), - sizeof(uint8_t) * (ra->size - i)); - ra->keys[i] = key; - ra->containers[i] = c; - ra->typecodes[i] = typecode; - ra->size++; -} - -// note: Java routine set things to 0, enabling GC. -// Java called it "resize" but it was always used to downsize. -// Allowing upsize would break the conventions about -// valid containers below ra->size. - -static void ra_downsize(roaring_array_t *ra, int32_t new_length) { - assert(new_length <= ra->size); - ra->size = new_length; -} - -static void ra_remove_at_index(roaring_array_t *ra, int32_t i) { - memmove(&(ra->containers[i]), &(ra->containers[i + 1]), - sizeof(container_t *) * (ra->size - i - 1)); - memmove(&(ra->keys[i]), &(ra->keys[i + 1]), - sizeof(uint16_t) * (ra->size - i - 1)); - memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), - sizeof(uint8_t) * (ra->size - i - 1)); - ra->size--; -} - -static void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { - container_free(ra->containers[i], ra->typecodes[i]); - ra_remove_at_index(ra, i); -} - -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. In use it should be followed by a call to -// downsize. -// -static void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin) { - assert(begin <= end); - assert(new_begin < begin); - - const int range = end - begin; - - // We ensure to previously have freed overwritten containers - // that are not copied elsewhere - - memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), - sizeof(container_t *) * range); - memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), - sizeof(uint16_t) * range); - memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), - sizeof(uint8_t) * range); -} - -static void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { - if (distance > 0) { - extend_array(ra, distance); - } - int32_t srcpos = ra->size - count; - int32_t dstpos = srcpos + distance; - memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), - sizeof(uint16_t) * count); - memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), - sizeof(container_t *) * count); - memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), - sizeof(uint8_t) * count); - ra->size += distance; -} - - -static void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { - size_t ctr = 0; - int32_t i; for (i = 0; i < ra->size; ++i) { - int num_added = container_to_uint32_array( - ans + ctr, ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - ctr += num_added; - } -} - -static bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { - size_t ctr = 0; - size_t dtr = 0; - - size_t t_limit = 0; - - bool first = false; - size_t first_skip = 0; - - uint32_t *t_ans = NULL; - size_t cur_len = 0; - - int i = 0; for (i = 0; i < ra->size; ++i) { - - const container_t *c = container_unwrap_shared( - ra->containers[i], &ra->typecodes[i]); - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: - t_limit = (const_CAST_bitset(c))->cardinality; - break; - case ARRAY_CONTAINER_TYPE: - t_limit = (const_CAST_array(c))->cardinality; - break; - case RUN_CONTAINER_TYPE: - t_limit = run_container_cardinality(const_CAST_run(c)); - break; - } - if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ - if (!first){ - //first_skip = t_limit - (ctr + t_limit - offset); - first_skip = offset - ctr; - first = true; - t_ans = (uint32_t *)ndpi_malloc(sizeof(*t_ans) * (first_skip + limit)); - if(t_ans == NULL) { - return false; - } - memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; - cur_len = first_skip + limit; - } - if (dtr + t_limit > cur_len){ - uint32_t * append_ans = (uint32_t *)ndpi_malloc(sizeof(*append_ans) * (cur_len + t_limit)); - if(append_ans == NULL) { - if(t_ans != NULL) ndpi_free(t_ans); - return false; - } - memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); - cur_len = cur_len + t_limit; - memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); - ndpi_free(t_ans); - t_ans = append_ans; - } - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: - container_to_uint32_array( - t_ans + dtr, - const_CAST_bitset(c), ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case ARRAY_CONTAINER_TYPE: - container_to_uint32_array( - t_ans + dtr, - const_CAST_array(c), ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case RUN_CONTAINER_TYPE: - container_to_uint32_array( - t_ans + dtr, - const_CAST_run(c), ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - } - dtr += t_limit; - } - ctr += t_limit; - if (dtr-first_skip >= limit) break; - } - if(t_ans != NULL) { - memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); - ndpi_free(t_ans); - } - return true; -} - -static bool ra_has_run_container(const roaring_array_t *ra) { - int32_t k; for (k = 0; k < ra->size; ++k) { - if (get_container_type(ra->containers[k], ra->typecodes[k]) == - RUN_CONTAINER_TYPE) - return true; - } - return false; -} - -static uint32_t ra_portable_header_size(const roaring_array_t *ra) { - if (ra_has_run_container(ra)) { - if (ra->size < - NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets - return 4 + (ra->size + 7) / 8 + 4 * ra->size; - } - return 4 + (ra->size + 7) / 8 + - 8 * ra->size; // - 4 because we pack the size with the cookie - } else { - return 4 + 4 + 8 * ra->size; - } -} - -static size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { - size_t count = ra_portable_header_size(ra); - - int32_t k; for (k = 0; k < ra->size; ++k) { - count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - return count; -} - -static size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { - char *initbuf = buf; - uint32_t startOffset = 0; - bool hasrun = ra_has_run_container(ra); - if (hasrun) { - uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - uint32_t s = (ra->size + 7) / 8; - uint8_t *bitmapOfRunContainers = (uint8_t *)ndpi_calloc(s, 1); - assert(bitmapOfRunContainers != NULL); // todo: handle - int32_t i; for (i = 0; i < ra->size; ++i) { - if (get_container_type(ra->containers[i], ra->typecodes[i]) == - RUN_CONTAINER_TYPE) { - bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); - } - } - memcpy(buf, bitmapOfRunContainers, s); - buf += s; - ndpi_free(bitmapOfRunContainers); - if (ra->size < NO_OFFSET_THRESHOLD) { - startOffset = 4 + 4 * ra->size + s; - } else { - startOffset = 4 + 8 * ra->size + s; - } - } else { // backwards compatibility - uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; - - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - memcpy(buf, &ra->size, sizeof(ra->size)); - buf += sizeof(ra->size); - - startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; - } - int32_t k; for (k = 0; k < ra->size; ++k) { - memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); - buf += sizeof(ra->keys[k]); - // get_cardinality returns a value in [1,1<<16], subtracting one - // we get [0,1<<16 - 1] which fits in 16 bits - uint16_t card = (uint16_t)( - container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); - memcpy(buf, &card, sizeof(card)); - buf += sizeof(card); - } - if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { - // writing the containers offsets - int32_t k; for (k = 0; k < ra->size; k++) { - memcpy(buf, &startOffset, sizeof(startOffset)); - buf += sizeof(startOffset); - startOffset = - startOffset + - container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - } - for (k = 0; k < ra->size; ++k) { - buf += container_write(ra->containers[k], ra->typecodes[k], buf); - } - return buf - initbuf; -} - -// Quickly checks whether there is a serialized bitmap at the pointer, -// not exceeding size "maxbytes" in bytes. This function does not allocate -// memory dynamically. -// -// This function returns 0 if and only if no valid bitmap is found. -// Otherwise, it returns how many bytes are occupied. -// -static size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { - size_t bytestotal = sizeof(int32_t);// for cookie - if(bytestotal > maxbytes) return 0; - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - return 0; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - bytestotal += sizeof(int32_t); - if(bytestotal > maxbytes) return 0; - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - return 0; // logically impossible - } - char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - bytestotal += s; - if(bytestotal > maxbytes) return 0; - bitmapOfRunContainers = (char *)buf; - buf += s; - } - bytestotal += size * 2 * sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t *keyscards = (uint16_t *)buf; - buf += size * 2 * sizeof(uint16_t); - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - // skipping the offsets - bytestotal += size * 4; - if(bytestotal > maxbytes) return 0; - buf += size * 4; - } - // Reading the containers - int32_t k; for (k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else if (isrun) { - bytestotal += sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - buf += sizeof(uint16_t); - size_t containersize = n_runs * sizeof(rle16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else { - size_t containersize = thiscard * sizeof(uint16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } - } - return bytestotal; -} - - -// this function populates answer from the content of buf (reading up to maxbytes bytes). -// The function returns false if a properly serialized bitmap cannot be found. -// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. -static bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { - *readbytes = sizeof(int32_t);// for cookie - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n"); - return false; - } - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n", - cookie); - return false; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - *readbytes += sizeof(int32_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n"); - return false; - } - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n", - size); - return false; // logically impossible - } - const char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - *readbytes += s; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading run bitmap.\n"); - return false; - } - bitmapOfRunContainers = buf; - buf += s; - } - uint16_t *keyscards = (uint16_t *)buf; - - *readbytes += size * 2 * sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n"); - return false; - } - buf += size * 2 * sizeof(uint16_t); - - bool is_ok = ra_init_with_capacity(answer, size); - if (!is_ok) { - fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n"); - return false; - } - - int32_t k; for (k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); - answer->keys[k] = tmp; - } - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - *readbytes += size * 4; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading offsets.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - - // skipping the offsets - buf += size * 4; - } - // Reading the containers - for (k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - // we check that the read is allowed - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - *readbytes += containersize; - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - bitset_container_t *c = bitset_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += bitset_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = BITSET_CONTAINER_TYPE; - } else if (isrun) { - // we check that the read is allowed - *readbytes += sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a run container (header).\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - size_t containersize = n_runs * sizeof(rle16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - - run_container_t *c = run_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += run_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = RUN_CONTAINER_TYPE; - } else { - // we check that the read is allowed - size_t containersize = thiscard * sizeof(uint16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - array_container_t *c = - array_container_create_given_capacity(thiscard); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE; - } - } - return true; -} - -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/roaring_array.c */ -/* begin file src/roaring_priority_queue.c */ - - -#ifdef __cplusplus -using namespace ::roaring::internal; - -extern "C" { namespace roaring { namespace api { -#endif - -struct roaring_pq_element_s { - uint64_t size; - bool is_temporary; - roaring_bitmap_t *bitmap; -}; - -typedef struct roaring_pq_element_s roaring_pq_element_t; - -struct roaring_pq_s { - roaring_pq_element_t *elements; - uint64_t size; -}; - -typedef struct roaring_pq_s roaring_pq_t; - -static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { - return t1->size < t2->size; -} - -static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { - uint64_t i = pq->size; - pq->elements[pq->size++] = *t; - while (i > 0) { - uint64_t p = (i - 1) >> 1; - roaring_pq_element_t ap = pq->elements[p]; - if (!compare(t, &ap)) break; - pq->elements[i] = ap; - i = p; - } - pq->elements[i] = *t; -} - -static void pq_free(roaring_pq_t *pq) { - ndpi_free(pq); -} - -static void percolate_down(roaring_pq_t *pq, uint32_t i) { - uint32_t size = (uint32_t)pq->size; - uint32_t hsize = size >> 1; - roaring_pq_element_t ai = pq->elements[i]; - while (i < hsize) { - uint32_t l = (i << 1) + 1; - uint32_t r = l + 1; - roaring_pq_element_t bestc = pq->elements[l]; - if (r < size) { - if (compare(pq->elements + r, &bestc)) { - l = r; - bestc = pq->elements[r]; - } - } - if (!compare(&bestc, &ai)) { - break; - } - pq->elements[i] = bestc; - i = l; - } - pq->elements[i] = ai; -} - -static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { - size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; - roaring_pq_t *answer = (roaring_pq_t *)ndpi_malloc(alloc_size); - answer->elements = (roaring_pq_element_t *)(answer + 1); - answer->size = length; - uint32_t i; for (i = 0; i < length; i++) { - answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; - answer->elements[i].is_temporary = false; - answer->elements[i].size = - roaring_bitmap_portable_size_in_bytes(arr[i]); - } - { - int32_t i; - for (i = (length >> 1); i >= 0; i--) { - percolate_down(answer, i); - } - } - return answer; -} - -static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { - roaring_pq_element_t ans = *pq->elements; - if (pq->size > 1) { - pq->elements[0] = pq->elements[--pq->size]; - percolate_down(pq, 0); - } else - --pq->size; - // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; - return ans; -} - -// this function consumes and frees the inputs -static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, - roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = ra_get_size(&x1->high_low_container), - length2 = ra_get_size(&x2->high_low_container); - if (0 == length1) { - roaring_bitmap_free(x1); - return x2; - } - if (0 == length2) { - roaring_bitmap_free(x2); - return x1; - } - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - // todo: unsharing can be inefficient as it may create a clone where - // none - // is needed, but it has the benefit of being easy to reason about. - - ra_unshare_container_at_index(&x1->high_low_container, pos1); - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - assert(type1 != SHARED_CONTAINER_TYPE); - - ra_unshare_container_at_index(&x2->high_low_container, pos2); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - assert(type2 != SHARED_CONTAINER_TYPE); - - container_t *c; - - if ((type2 == BITSET_CONTAINER_TYPE) && - (type1 != BITSET_CONTAINER_TYPE) - ){ - c = container_lazy_ior(c2, type2, c1, type1, &result_type); - container_free(c1, type1); - if (c != c2) { - container_free(c2, type2); - } - } else { - c = container_lazy_ior(c1, type1, c2, type2, &result_type); - container_free(c2, type2); - if (c != c1) { - container_free(c1, type1); - } - } - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_move_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2); - } else if (pos2 == length2) { - ra_append_move_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1); - } - ra_clear_without_containers(&x1->high_low_container); - ra_clear_without_containers(&x2->high_low_container); - ndpi_free(x1); - ndpi_free(x2); - return answer; -} - -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_pq_t *pq = create_pq(x, number); - while (pq->size > 1) { - roaring_pq_element_t x1 = pq_poll(pq); - roaring_pq_element_t x2 = pq_poll(pq); - - if (x1.is_temporary && x2.is_temporary) { - roaring_bitmap_t *newb = - lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); - // should normally return a fresh new bitmap *except* that - // it can return x1.bitmap or x2.bitmap in degenerate cases - bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = temporary, .bitmap = newb}; - pq_add(pq, &newelement); - } else if (x2.is_temporary) { - roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); - x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); - pq_add(pq, &x2); - } else if (x1.is_temporary) { - roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); - x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); - - pq_add(pq, &x1); - } else { - roaring_bitmap_t *newb = - roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = true, .bitmap = newb}; - - pq_add(pq, &newelement); - } - } - roaring_pq_element_t X = pq_poll(pq); - roaring_bitmap_t *answer = X.bitmap; - roaring_bitmap_repair_after_lazy(answer); - pq_free(pq); - return answer; -} - -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace api { -#endif -/* end file src/roaring_priority_queue.c */ -/* begin file src/roaring.c */ -#include <assert.h> -#include <stdarg.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> -#include <inttypes.h> - - - -#ifdef __cplusplus -using namespace ::roaring::internal; - -extern "C" { namespace roaring { namespace api { -#endif - -extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, - uint32_t val); -extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r); -extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow); - -static inline bool is_cow(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_COW; -} -static inline bool is_frozen(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_FROZEN; -} - -// this is like roaring_bitmap_add, but it populates pointer arguments in such a -// way -// that we can recover the container touched, which, in turn can be used to -// accelerate some functions (when you repeatedly need to add to the same -// container) -static inline container_t *containerptr_roaring_bitmap_add( - roaring_bitmap_t *r, uint32_t val, - uint8_t *type, int *index -){ - roaring_array_t *ra = &r->high_low_container; - - uint16_t hb = val >> 16; - const int i = ra_get_index(ra, hb); - if (i >= 0) { - ra_unshare_container_at_index(ra, i); - container_t *c = ra_get_container_at_index(ra, i, type); - uint8_t new_type = *type; - container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); - *index = i; - if (c2 != c) { - container_free(c, *type); - ra_set_container_at_index(ra, i, c2, new_type); - *type = new_type; - return c2; - } else { - return c; - } - } else { - array_container_t *new_ac = array_container_create(); - container_t *c = container_add(new_ac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE, type); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); - *index = -i - 1; - return c; - } -} - -static roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)ndpi_malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); - if (!is_ok) { - ndpi_free(ans); - return NULL; - } - return ans; -} - -static bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { - return ra_init_with_capacity(&r->high_low_container, cap); -} - - -static void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - container_t *container = NULL; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) return; - uint32_t val; - memcpy(&val, vals + i, sizeof(val)); - container = - containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - i++; - for (; i < n_args; i++) { - memcpy(&val, vals + i, sizeof(val)); - if (((prev ^ val) >> 16) == - 0) { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { // rare instance when we need to - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, - containerindex, container2, - newtypecode); - typecode = newtypecode; - container = container2; - } - } else { - container = containerptr_roaring_bitmap_add(r, val, &typecode, - &containerindex); - } - prev = val; - } -} - -static roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bitmap_add_many(answer, n_args, vals); - return answer; -} - -static roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { - // todo: could be greatly optimized but we do not expect this call to ever - // include long lists - roaring_bitmap_t *answer = roaring_bitmap_create(); - va_list ap; - va_start(ap, n_args); - size_t i; for (i = 1; i <= n_args; i++) { - uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add(answer, val); - } - va_end(ap); - return answer; -} - -static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { - return (a < b) ? a : b; -} - -static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { - return (a < b) ? a : b; -} - -static roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step) { - if(max >= UINT64_C(0x100000000)) { - max = UINT64_C(0x100000000); - } - if (step == 0) return NULL; - if (max <= min) return NULL; - roaring_bitmap_t *answer = roaring_bitmap_create(); - if (step >= (1 << 16)) { - uint32_t value; for (value = (uint32_t)min; value < max; value += step) { - roaring_bitmap_add(answer, value); - } - return answer; - } - uint64_t min_tmp = min; - do { - uint32_t key = (uint32_t)min_tmp >> 16; - uint32_t container_min = min_tmp & 0xFFFF; - uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); - uint8_t type; - container_t *container = container_from_range(&type, container_min, - container_max, (uint16_t)step); - ra_append(&answer->high_low_container, key, container, type); - uint32_t gap = container_max - container_min + step - 1; - min_tmp += gap - (gap % step); - } while (min_tmp < max); - // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step - return answer; -} - -static void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) { - if (min > max) { - return; - } - - roaring_array_t *ra = &r->high_low_container; - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t num_required_containers = max_key - min_key + 1; - int32_t suffix_length = count_greater(ra->keys, ra->size, max_key); - int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length, - min_key); - int32_t common_length = ra->size - prefix_length - suffix_length; - - if (num_required_containers > common_length) { - ra_shift_tail(ra, suffix_length, - num_required_containers - common_length); - } - - int32_t src = prefix_length + common_length - 1; - int32_t dst = ra->size - suffix_length - 1; - uint32_t key; for ( key = max_key; key != min_key-1; key--) { // beware of min_key==0 - uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; - container_t* new_container; - uint8_t new_type; - - if (src >= 0 && ra->keys[src] == key) { - ra_unshare_container_at_index(ra, src); - new_container = container_add_range(ra->containers[src], - ra->typecodes[src], - container_min, container_max, - &new_type); - if (new_container != ra->containers[src]) { - container_free(ra->containers[src], - ra->typecodes[src]); - } - src--; - } else { - new_container = container_from_range(&new_type, container_min, - container_max+1, 1); - } - ra_replace_key_and_container_at_index(ra, dst, key, new_container, - new_type); - dst--; - } -} - -static void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) { - if (min > max) { - return; - } - - roaring_array_t *ra = &r->high_low_container; - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t src = count_less(ra->keys, ra->size, min_key); - int32_t dst = src; - while (src < ra->size && ra->keys[src] <= max_key) { - uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; - ra_unshare_container_at_index(ra, src); - container_t *new_container; - uint8_t new_type; - new_container = container_remove_range(ra->containers[src], - ra->typecodes[src], - container_min, container_max, - &new_type); - if (new_container != ra->containers[src]) { - container_free(ra->containers[src], - ra->typecodes[src]); - } - if (new_container) { - ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], - new_container, new_type); - dst++; - } - src++; - } - if (src > dst) { - ra_shift_tail(ra, ra->size - src, dst - src); - } -} - -extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max); -extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max); - -static void roaring_bitmap_printf(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; - - printf("{"); - int i = 0; for (i = 0; i < ra->size; ++i) { - container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - - if (i + 1 < ra->size) { - printf(","); - } - } - printf("}"); -} - -static void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; - - printf("{"); - int i = 0; for (i = 0; i < ra->size; ++i) { - printf("%d: %s (%d)", ra->keys[i], - get_full_container_name(ra->containers[i], ra->typecodes[i]), - container_get_cardinality(ra->containers[i], ra->typecodes[i])); - if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { - printf( - "(shared count = %" PRIu32 " )", - CAST_shared(ra->containers[i])->counter); - } - - if (i + 1 < ra->size) { - printf(", "); - } - } - printf("}"); -} - -typedef struct min_max_sum_s { - uint32_t min; - uint32_t max; - uint64_t sum; -} min_max_sum_t; - -static bool min_max_sum_fnc(uint32_t value, void *param) { - min_max_sum_t *mms = (min_max_sum_t *)param; - if (value > mms->max) mms->max = value; - if (value < mms->min) mms->min = value; - mms->sum += value; - return true; // we always process all data points -} - -/** -* (For advanced users.) -* Collect statistics about the bitmap -*/ -static void roaring_bitmap_statistics(const roaring_bitmap_t *r, - roaring_statistics_t *stat) { - const roaring_array_t *ra = &r->high_low_container; - - memset(stat, 0, sizeof(*stat)); - stat->n_containers = ra->size; - stat->cardinality = roaring_bitmap_get_cardinality(r); - min_max_sum_t mms; - mms.min = UINT32_C(0xFFFFFFFF); - mms.max = UINT32_C(0); - mms.sum = 0; - roaring_iterate(r, &min_max_sum_fnc, &mms); - stat->min_value = mms.min; - stat->max_value = mms.max; - stat->sum_value = mms.sum; - - int i = 0; for (i = 0; i < ra->size; ++i) { - uint8_t truetype = - get_container_type(ra->containers[i], ra->typecodes[i]); - uint32_t card = - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - uint32_t sbytes = - container_size_in_bytes(ra->containers[i], ra->typecodes[i]); - switch (truetype) { - case BITSET_CONTAINER_TYPE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - __builtin_unreachable(); - } - } -} - -static roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)ndpi_malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - if (!ra_init_with_capacity( // allocation of list of containers can fail - &ans->high_low_container, r->high_low_container.size) - ){ - ndpi_free(ans); - return NULL; - } - if (!ra_overwrite( // memory allocation of individual containers may fail - &r->high_low_container, &ans->high_low_container, is_cow(r)) - ){ - roaring_bitmap_free(ans); // overwrite should leave in freeable state - return NULL; - } - roaring_bitmap_set_copy_on_write(ans, is_cow(r)); - return ans; -} - -static bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src) { - roaring_bitmap_set_copy_on_write(dest, is_cow(src)); - return ra_overwrite(&src->high_low_container, &dest->high_low_container, - is_cow(src)); -} - -static void roaring_bitmap_free(const roaring_bitmap_t *r) { - if (!is_frozen(r)) { - ra_clear((roaring_array_t*)&r->high_low_container); - } - ndpi_free((roaring_bitmap_t*)r); -} - -static void roaring_bitmap_clear(roaring_bitmap_t *r) { - ra_reset(&r->high_low_container); -} - -static void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { - roaring_array_t *ra = &r->high_low_container; - - const uint16_t hb = val >> 16; - const int i = ra_get_index(ra, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(ra, i); - container_t *container = - ra_get_container_at_index(ra, i, &typecode); - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - } else { - array_container_t *newac = array_container_create(); - container_t *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - } -} - -static bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - container_t *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - result = true; - } else { - const int newCardinality = - container_get_cardinality(container, newtypecode); - - result = oldCardinality != newCardinality; - } - } else { - array_container_t *newac = array_container_create(); - container_t *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - result = true; - } - - return result; -} - -static void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - container_t *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - container_t *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - if (container_get_cardinality(container2, newtypecode) != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - } -} - -static bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - container_t *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - container_t *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - - const int newCardinality = - container_get_cardinality(container2, newtypecode); - - if (newCardinality != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - - result = oldCardinality != newCardinality; - } - return result; -} - -static void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - if (n_args == 0 || r->high_low_container.size == 0) { - return; - } - int32_t pos = -1; // position of the container used in the previous iteration - size_t i; for (i = 0; i < n_args; i++) { - uint16_t key = (uint16_t)(vals[i] >> 16); - if (pos < 0 || key != r->high_low_container.keys[pos]) { - pos = ra_get_index(&r->high_low_container, key); - } - if (pos >= 0) { - uint8_t new_typecode; - container_t *new_container; - new_container = container_remove(r->high_low_container.containers[pos], - vals[i] & 0xffff, - r->high_low_container.typecodes[pos], - &new_typecode); - if (new_container != r->high_low_container.containers[pos]) { - container_free(r->high_low_container.containers[pos], - r->high_low_container.typecodes[pos]); - ra_replace_key_and_container_at_index(&r->high_low_container, - pos, key, new_container, - new_typecode); - } - if (!container_nonzero_cardinality(new_container, new_typecode)) { - container_free(new_container, new_typecode); - ra_remove_at_index(&r->high_low_container, pos); - pos = -1; - } - } - } -} - -// there should be some SIMD optimizations possible here -static roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = container_and(c1, type1, c2, type2, &result_type); - - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); // otherwise: memory leak! - } - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; -} - -/** - * Compute the union of 'number' bitmaps. - */ -static roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = - roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); - size_t i; for (i = 2; i < number; i++) { - roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -/** - * Compute the xor of 'number' bitmaps. - */ -static roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); - size_t i; for (i = 2; i < number; i++) { - roaring_bitmap_lazy_xor_inplace(answer, x[i]); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -// inplace and (modifies its first argument). -static void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - if (x1 == x2) return; - int pos1 = 0, pos2 = 0, intersection_size = 0; - const int length1 = ra_get_size(&x1->high_low_container); - const int length2 = ra_get_size(&x2->high_low_container); - - // any skipped-over or newly emptied containers in x1 - // have to be freed. - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t type1, type2, result_type; - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared container. - // Rationale: using a shared container safely with in place computation would - // require making a copy and then doing the computation in place which is likely - // less efficient than avoiding in place entirely and always generating a new - // container. - container_t *c = - (type1 == SHARED_CONTAINER_TYPE) - ? container_and(c1, type1, c2, type2, &result_type) - : container_iand(c1, type1, c2, type2, &result_type); - - if (c != c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, type1); - } - if (container_nonzero_cardinality(c, result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c, - result_type); - intersection_size++; - } else { - container_free(c, result_type); - } - ++pos1; - ++pos2; - } else if (s1 < s2) { - pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - - // if we ended early because x2 ran out, then all remaining in x1 should be - // freed - while (pos1 < length1) { - container_free(x1->high_low_container.containers[pos1], - x1->high_low_container.typecodes[pos1]); - ++pos1; - } - - // all containers after this have either been copied or freed - ra_downsize(&x1->high_low_container, intersection_size); -} - -static roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = container_or(c1, type1, c2, type2, &result_type); - - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - // c1 = container_clone(c1, type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - // c2 = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; -} - -// inplace or (modifies its first argument). -static void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - if (!container_is_full(c1, type1)) { - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = - (type1 == SHARED_CONTAINER_TYPE) - ? container_or(c1, type1, c2, type2, &result_type) - : container_ior(c1, type1, c2, type2, &result_type); - - if (c != c1) { // in this instance a new container was created, - // and we need to free the old one - container_free(c1, type1); - } - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - - // container_t *c2_clone = container_clone(c2, type2); - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} - -static roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = container_xor(c1, type1, c2, type2, &result_type); - - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; -} - -// inplace xor (modifies its first argument). - -static void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - - // XOR can have new containers inserted from x2, but can also - // lose containers when x1 and x2 are nonempty and identical. - - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared container. - // Rationale: using a shared container safely with in place computation would - // require making a copy and then doing the computation in place which is likely - // less efficient than avoiding in place entirely and always generating a new - // container. - - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_xor(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // so release - } - else { - c = container_ixor(c1, type1, c2, type2, &result_type); - } - - if (container_nonzero_cardinality(c, result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - ++pos1; - } else { - container_free(c, result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} - -static roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2)); - return empty_bitmap; - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = 0; - uint16_t s2 = 0; - while (true) { - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = container_andnot(c1, type1, c2, type2, - &result_type); - - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - } else if (s1 < s2) { // s1 < s2 - const int next_pos1 = - ra_advance_until(&x1->high_low_container, s2, pos1); - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, next_pos1, - is_cow(x1)); - // TODO : perhaps some of the copy_on_write should be based on - // answer rather than x1 (more stringent?). Many similar cases - pos1 = next_pos1; - if (pos1 == length1) break; - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - } - } - if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; -} - -// inplace andnot (modifies its first argument). - -static void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - int intersection_size = 0; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_clear(x1); - return; - } - - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared container. - // Rationale: using a shared container safely with in place computation would - // require making a copy and then doing the computation in place which is likely - // less efficient than avoiding in place entirely and always generating a new - // container. - - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_andnot(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // release - } - else { - c = container_iandnot(c1, type1, c2, type2, &result_type); - } - - if (container_nonzero_cardinality(c, result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size++, s1, - c, result_type); - } else { - container_free(c, result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - if (pos1 != intersection_size) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c1, - type1); - } - intersection_size++; - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - - if (pos1 < length1) { - // all containers between intersection_size and - // pos1 are junk. However, they have either been moved - // (thus still referenced) or involved in an iandnot - // that will clean up all containers that could not be reused. - // Thus we should not free the junk containers between - // intersection_size and pos1. - if (pos1 > intersection_size) { - // left slide of remaining items - ra_copy_range(&x1->high_low_container, pos1, length1, - intersection_size); - } - // else current placement is fine - intersection_size += (length1 - pos1); - } - ra_downsize(&x1->high_low_container, intersection_size); -} - -static uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; - - uint64_t card = 0; - int i = 0; for (i = 0; i < ra->size; ++i) - card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); - return card; -} - -static uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, - uint64_t range_start, - uint64_t range_end) { - const roaring_array_t *ra = &r->high_low_container; - - if (range_end > UINT32_MAX) { - range_end = UINT32_MAX + UINT64_C(1); - } - if (range_start >= range_end) { - return 0; - } - range_end--; // make range_end inclusive - // now we have: 0 <= range_start <= range_end <= UINT32_MAX - - uint16_t minhb = range_start >> 16; - uint16_t maxhb = range_end >> 16; - - uint64_t card = 0; - - int i = ra_get_index(ra, minhb); - if (i >= 0) { - if (minhb == maxhb) { - card += container_rank(ra->containers[i], ra->typecodes[i], - range_end & 0xffff); - } else { - card += container_get_cardinality(ra->containers[i], - ra->typecodes[i]); - } - if ((range_start & 0xffff) != 0) { - card -= container_rank(ra->containers[i], ra->typecodes[i], - (range_start & 0xffff) - 1); - } - i++; - } else { - i = -i - 1; - } - - for (; i < ra->size; i++) { - uint16_t key = ra->keys[i]; - if (key < maxhb) { - card += container_get_cardinality(ra->containers[i], - ra->typecodes[i]); - } else if (key == maxhb) { - card += container_rank(ra->containers[i], ra->typecodes[i], - range_end & 0xffff); - break; - } else { - break; - } - } - - return card; -} - - -static bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { - return r->high_low_container.size == 0; -} - -static void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { - ra_to_uint32_array(&r->high_low_container, ans); -} - -static bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, - size_t offset, size_t limit, - uint32_t *ans) { - return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); -} - -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. -*/ -static bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { - bool answer = false; - int i = 0; for (i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original, type_after; - ra_unshare_container_at_index( - &r->high_low_container, i); // TODO: this introduces extra cloning! - container_t *c = ra_get_container_at_index(&r->high_low_container, i, - &type_original); - container_t *c1 = convert_run_optimize(c, type_original, &type_after); - if (type_after == RUN_CONTAINER_TYPE) { - answer = true; - } - ra_set_container_at_index(&r->high_low_container, i, c1, type_after); - } - return answer; -} - -static size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { - size_t answer = 0; - int i = 0; for (i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original; - container_t *c = ra_get_container_at_index(&r->high_low_container, i, - &type_original); - answer += container_shrink_to_fit(c, type_original); - } - answer += ra_shrink_to_fit(&r->high_low_container); - return answer; -} - -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -static bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { - bool answer = false; - int i = 0; for (i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original, type_after; - container_t *c = ra_get_container_at_index(&r->high_low_container, i, - &type_original); - if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { - answer = true; - if (type_original == SHARED_CONTAINER_TYPE) { - run_container_t *truec = CAST_run(CAST_shared(c)->container); - int32_t card = run_container_cardinality(truec); - container_t *c1 = convert_to_bitset_or_array_container( - truec, card, &type_after); - shared_container_free(CAST_shared(c)); // frees run as needed - ra_set_container_at_index(&r->high_low_container, i, c1, - type_after); - - } else { - int32_t card = run_container_cardinality(CAST_run(c)); - container_t *c1 = convert_to_bitset_or_array_container( - CAST_run(c), card, &type_after); - run_container_free(CAST_run(c)); - ra_set_container_at_index(&r->high_low_container, i, c1, - type_after); - } - } - } - return answer; -} - -static size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); - uint64_t cardinality = roaring_bitmap_get_cardinality(r); - uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - if (portablesize < sizeasarray) { - buf[0] = SERIALIZATION_CONTAINER; - return roaring_bitmap_portable_serialize(r, buf + 1) + 1; - } else { - buf[0] = SERIALIZATION_ARRAY_UINT32; - memcpy(buf + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array( - r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); - return 1 + (size_t)sizeasarray; - } -} - -static size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); - uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + - sizeof(uint32_t); - return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; -} - -static size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { - return ra_portable_size_in_bytes(&r->high_low_container); -} - - -static roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)ndpi_malloc(sizeof(roaring_bitmap_t)); - if (ans == NULL) { - return NULL; - } - size_t bytesread; - bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); - if(is_ok) assert(bytesread <= maxbytes); - roaring_bitmap_set_copy_on_write(ans, false); - if (!is_ok) { - ndpi_free(ans); - return NULL; - } - return ans; -} - -static roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { - return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); -} - - -static size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { - return ra_portable_deserialize_size(buf, maxbytes); -} - - -static size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, - char *buf) { - return ra_portable_serialize(&r->high_low_container, buf); -} - -static roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { - const char *bufaschar = (const char *)buf; - if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - return roaring_bitmap_of_ptr(card, elems); - } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize(bufaschar + 1); - } else - return (NULL); -} - -static bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, - void *ptr) { - const roaring_array_t *ra = &r->high_low_container; - - int i = 0; for (i = 0; i < ra->size; ++i) - if (!container_iterate(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16, - iterator, ptr)) { - return false; - } - return true; -} - -static bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - const roaring_array_t *ra = &r->high_low_container; - - int i = 0; for (i = 0; i < ra->size; ++i) - if (!container_iterate64( - ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16, iterator, - high_bits, ptr)) { - return false; - } - return true; -} - -/**** -* begin roaring_uint32_iterator_t -*****/ - -// Partially initializes the roaring iterator when it begins looking at -// a new container. -static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { - newit->in_container_index = 0; - newit->run_index = 0; - newit->current_value = 0; - if (newit->container_index >= newit->parent->high_low_container.size || - newit->container_index < 0) { - newit->current_value = UINT32_MAX; - return (newit->has_value = false); - } - // assume not empty - newit->has_value = true; - // we precompute container, typecode and highbits so that successive - // iterators do not have to grab them from odd memory locations - // and have to worry about the (easily predicted) container_unwrap_shared - // call. - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - return newit->has_value; -} - -static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { - if (!iter_new_container_partial_init(newit)) - return newit->has_value; - - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = const_CAST_bitset(newit->container); - - uint32_t wordindex = 0; - uint64_t word; - while ((word = bc->words[wordindex]) == 0) { - wordindex++; // advance - } - // here "word" is non-zero - newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); - newit->current_value = newit->highbits | newit->in_container_index; - break; } - - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = const_CAST_array(newit->container); - newit->current_value = newit->highbits | ac->array[0]; - break; } - - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(newit->container); - newit->current_value = newit->highbits | rc->runs[0].value; - break; } - - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; -} - -static bool loadlastvalue(roaring_uint32_iterator_t* newit) { - if (!iter_new_container_partial_init(newit)) - return newit->has_value; - - switch(newit->typecode) { - case BITSET_CONTAINER_TYPE: { - uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; - uint64_t word; - const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; - while ((word = bitset_container->words[wordindex]) == 0) - --wordindex; - - int num_leading_zeros = __builtin_clzll(word); - newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); - newit->current_value = newit->highbits | newit->in_container_index; - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t* array_container = (const array_container_t*)newit->container; - newit->in_container_index = array_container->cardinality - 1; - newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t* run_container = (const run_container_t*)newit->container; - newit->run_index = run_container->n_runs - 1; - const rle16_t* last_run = &run_container->runs[newit->run_index]; - newit->current_value = newit->highbits | (last_run->value + last_run->length); - break; - } - default: - // if this ever happens, bug! - assert(false); - } - return true; -} - -// prerequesite: the value should be in range of the container -static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { - // Don't have to check return value because of prerequisite - iter_new_container_partial_init(newit); - uint16_t lb = val & 0xFFFF; - - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = const_CAST_bitset(newit->container); - newit->in_container_index = - bitset_container_index_equalorlarger(bc, lb); - newit->current_value = newit->highbits | newit->in_container_index; - break; } - - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = const_CAST_array(newit->container); - newit->in_container_index = - array_container_index_equalorlarger(ac, lb); - newit->current_value = - newit->highbits | ac->array[newit->in_container_index]; - break; } - - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(newit->container); - newit->run_index = run_container_index_equalorlarger(rc, lb); - if (rc->runs[newit->run_index].value <= lb) { - newit->current_value = val; - } else { - newit->current_value = - newit->highbits | rc->runs[newit->run_index].value; - } - break; } - - default: - __builtin_unreachable(); - } - - return true; -} - -static void roaring_init_iterator(const roaring_bitmap_t *r, - roaring_uint32_iterator_t *newit) { - newit->parent = r; - newit->container_index = 0; - newit->has_value = loadfirstvalue(newit); -} - -static void roaring_init_iterator_last(const roaring_bitmap_t *r, - roaring_uint32_iterator_t *newit) { - newit->parent = r; - newit->container_index = newit->parent->high_low_container.size - 1; - newit->has_value = loadlastvalue(newit); -} - -static roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)ndpi_malloc(sizeof(roaring_uint32_iterator_t)); - if (newit == NULL) return NULL; - roaring_init_iterator(r, newit); - return newit; -} - -static roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)ndpi_malloc(sizeof(roaring_uint32_iterator_t)); - memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); - return newit; -} - -static bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { - uint16_t hb = val >> 16; - const int i = ra_get_index(& it->parent->high_low_container, hb); - if (i >= 0) { - uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if(lowvalue < lb ) { - it->container_index = i+1; // will have to load first value of next container - } else {// the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; - } - } else { - // there is no matching, so we are going for the next container - it->container_index = -i-1; - } - it->has_value = loadfirstvalue(it); - return it->has_value; -} - - -static bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { - if (it->container_index >= it->parent->high_low_container.size) { - return (it->has_value = false); - } - if (it->container_index < 0) { - it->container_index = 0; - return (it->has_value = loadfirstvalue(it)); - } - - switch (it->typecode) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = const_CAST_bitset(it->container); - it->in_container_index++; - - uint32_t wordindex = it->in_container_index / 64; - if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; - - uint64_t word = bc->words[wordindex] & - (UINT64_MAX << (it->in_container_index % 64)); - // next part could be optimized/simplified - while ((word == 0) && - (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { - wordindex++; - word = bc->words[wordindex]; - } - if (word != 0) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - break; } - - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = const_CAST_array(it->container); - it->in_container_index++; - if (it->in_container_index < ac->cardinality) { - it->current_value = - it->highbits | ac->array[it->in_container_index]; - return (it->has_value = true); - } - break; } - - case RUN_CONTAINER_TYPE: { - if(it->current_value == UINT32_MAX) { // avoid overflow to zero - return (it->has_value = false); - } - - const run_container_t* rc = const_CAST_run(it->container); - uint32_t limit = (it->highbits | (rc->runs[it->run_index].value + - rc->runs[it->run_index].length)); - if (++it->current_value <= limit) { - return (it->has_value = true); - } - - if (++it->run_index < rc->n_runs) { // Assume the run has a value - it->current_value = - it->highbits | rc->runs[it->run_index].value; - return (it->has_value = true); - } - break; - } - - default: - __builtin_unreachable(); - } - - // moving to next container - it->container_index++; - return (it->has_value = loadfirstvalue(it)); -} - -static bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { - if (it->container_index < 0) { - return (it->has_value = false); - } - if (it->container_index >= it->parent->high_low_container.size) { - it->container_index = it->parent->high_low_container.size - 1; - return (it->has_value = loadlastvalue(it)); - } - - switch (it->typecode) { - case BITSET_CONTAINER_TYPE: { - if (--it->in_container_index < 0) - break; - - const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; - int32_t wordindex = it->in_container_index / 64; - uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); - - while (word == 0 && --wordindex >= 0) { - word = bitset_container->words[wordindex]; - } - if (word == 0) - break; - - int num_leading_zeros = __builtin_clzll(word); - it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - case ARRAY_CONTAINER_TYPE: { - if (--it->in_container_index < 0) - break; - - const array_container_t* array_container = (const array_container_t*)it->container; - it->current_value = it->highbits | array_container->array[it->in_container_index]; - return (it->has_value = true); - } - case RUN_CONTAINER_TYPE: { - if(it->current_value == 0) - return (it->has_value = false); - - const run_container_t* run_container = (const run_container_t*)it->container; - if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { - return (it->has_value = true); - } - - if (--it->run_index < 0) - break; - - it->current_value = it->highbits | (run_container->runs[it->run_index].value + - run_container->runs[it->run_index].length); - return (it->has_value = true); - } - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - - // moving to previous container - it->container_index--; - return (it->has_value = loadlastvalue(it)); -} - -static uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { - uint32_t ret = 0; - uint32_t num_values; - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - const array_container_t* acont; //TODO remove - const run_container_t* rcont; //TODO remove - const bitset_container_t* bcont; //TODO remove - - while (it->has_value && ret < count) { - switch (it->typecode) { - case BITSET_CONTAINER_TYPE: - bcont = const_CAST_bitset(it->container); - wordindex = it->in_container_index / 64; - word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); - do { - while (word != 0 && ret < count) { - buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); - word = word & (word - 1); - buf++; - ret++; - } - while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { - wordindex++; - word = bcont->words[wordindex]; - } - } while (word != 0 && ret < count); - it->has_value = (word != 0); - if (it->has_value) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - } - break; - case ARRAY_CONTAINER_TYPE: - acont = const_CAST_array(it->container); - num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); - uint32_t i; for (i = 0; i < num_values; i++) { - buf[i] = it->highbits | acont->array[it->in_container_index + i]; - } - buf += num_values; - ret += num_values; - it->in_container_index += num_values; - it->has_value = (it->in_container_index < acont->cardinality); - if (it->has_value) { - it->current_value = it->highbits | acont->array[it->in_container_index]; - } - break; - case RUN_CONTAINER_TYPE: - rcont = const_CAST_run(it->container); - //"in_run_index" name is misleading, read it as "max_value_in_current_run" - do { - uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); - num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); - uint32_t i; for (i = 0; i < num_values; i++) { - buf[i] = it->current_value + i; - } - it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 - buf += num_values; - ret += num_values; - - if (it->current_value > largest_run_value || it->current_value == 0) { - it->run_index++; - if (it->run_index < rcont->n_runs) { - it->current_value = it->highbits | rcont->runs[it->run_index].value; - } else { - it->has_value = false; - } - } - } while ((ret < count) && it->has_value); - break; - default: - assert(false); - } - if (it->has_value) { - assert(ret == count); - return ret; - } - it->container_index++; - it->has_value = loadfirstvalue(it); - } - return ret; -} - - - -static void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { ndpi_free(it); } - -/**** -* end of roaring_uint32_iterator_t -*****/ - -static bool roaring_bitmap_equals(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - const roaring_array_t *ra1 = &r1->high_low_container; - const roaring_array_t *ra2 = &r2->high_low_container; - - if (ra1->size != ra2->size) { - return false; - } - int i = 0; for (i = 0; i < ra1->size; ++i) { - if (ra1->keys[i] != ra2->keys[i]) { - return false; - } - } - for (i = 0; i < ra1->size; ++i) { - bool areequal = container_equals(ra1->containers[i], - ra1->typecodes[i], - ra2->containers[i], - ra2->typecodes[i]); - if (!areequal) { - return false; - } - } - return true; -} - -static bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - const roaring_array_t *ra1 = &r1->high_low_container; - const roaring_array_t *ra2 = &r2->high_low_container; - - const int length1 = ra1->size, - length2 = ra2->size; - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(ra1, pos1); - const uint16_t s2 = ra_get_key_at_index(ra2, pos2); - - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1); - container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2); - if (!container_is_subset(c1, type1, c2, type2)) - return false; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - return false; - } else { // s1 > s2 - pos2 = ra_advance_until(ra2, s1, pos2); - } - } - if (pos1 == length1) - return true; - else - return false; -} - -static void insert_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} - -static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = container_inot_range( - container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - // if a new container was created, the old one was already freed - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } -} - -static void insert_fully_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, - uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not(container_to_flip, ctype_in, &ctype_out); - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} - -static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_inot(container_to_flip, ctype_in, &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } -} - -static roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return roaring_bitmap_copy(x1); - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - roaring_bitmap_t *ans = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; - - ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, - hb_start, is_cow(x1)); - if (hb_start == hb_end) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, - lb_start, 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - - uint32_t hb; for (hb = hb_start; hb <= hb_end; ++hb) { - insert_fully_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb); - } - - // handle a partial final container - if (lb_end != 0xFFFF) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } - ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, - hb_end, is_cow(x1)); - return ans; -} - -static void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return; // empty range - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); - - if (hb_start == hb_end) { - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; - - uint32_t hb; for (hb = hb_start; hb <= hb_end; ++hb) { - inplace_fully_flip_container(&x1->high_low_container, hb); - } - // handle a partial final container - if (lb_end != 0xFFFF) { - inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } -} - -static roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c; - if (bitsetconversion && - (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && - (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE) - ){ - container_t *newc1 = - container_mutable_unwrap_shared(c1, &type1); - newc1 = container_to_bitset(newc1, type1); - type1 = BITSET_CONTAINER_TYPE; - c = container_lazy_ior(newc1, type1, c2, type2, - &result_type); - if (c != newc1) { // should not happen - container_free(newc1, type1); - } - } else { - c = container_lazy_or(c1, type1, c2, type2, &result_type); - } - // since we assume that the initial containers are non-empty, - // the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; -} - -static void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - if (!container_is_full(c1, type1)) { - if ((bitsetconversion == false) || - (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE) - ){ - c1 = get_writable_copy_if_shared(c1, &type1); - } else { - // convert to bitset - container_t *old_c1 = c1; - uint8_t old_type1 = type1; - c1 = container_mutable_unwrap_shared(c1, &type1); - c1 = container_to_bitset(c1, type1); - container_free(old_c1, old_type1); - type1 = BITSET_CONTAINER_TYPE; - } - - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = container_lazy_ior(c1, type1, c2, type2, - &result_type); - - if (c != c1) { // in this instance a new container was created, - // and we need to free the old one - container_free(c1, type1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - // container_t *c2_clone = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} - -static roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - container_t *c = container_lazy_xor( - c1, type1, c2, type2, &result_type); - - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; -} - -static void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared container. - // Rationale: using a shared container safely with in place computation would - // require making a copy and then doing the computation in place which is likely - // less efficient than avoiding in place entirely and always generating a new - // container. - - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_lazy_xor(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // release - } - else { - c = container_lazy_ixor(c1, type1, c2, type2, &result_type); - } - - if (container_nonzero_cardinality(c, result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - ++pos1; - } else { - container_free(c, result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - // container_t *c2_clone = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} - -static void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { - roaring_array_t *ra = &r->high_low_container; - - int i = 0; for (i = 0; i < ra->size; ++i) { - const uint8_t old_type = ra->typecodes[i]; - container_t *old_c = ra->containers[i]; - uint8_t new_type = old_type; - container_t *new_c = container_repair_after_lazy(old_c, &new_type); - ra->containers[i] = new_c; - ra->typecodes[i] = new_type; - } -} - - - -/** -* roaring_bitmap_rank returns the number of integers that are smaller or equal -* to x. -*/ -static uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { - uint64_t size = 0; - uint32_t xhigh = x >> 16; - int i = 0; for (i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - return size + container_rank(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], - x & 0xFFFF); - } else { - return size; - } - } - return size; -} - -/** -* roaring_bitmap_smallest returns the smallest value in the set. -* Returns UINT32_MAX if the set is empty. -*/ -static uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - container_t *c = bm->high_low_container.containers[0]; - uint8_t type = bm->high_low_container.typecodes[0]; - uint32_t key = bm->high_low_container.keys[0]; - uint32_t lowvalue = container_minimum(c, type); - return lowvalue | (key << 16); - } - return UINT32_MAX; -} - -/** -* roaring_bitmap_smallest returns the greatest value in the set. -* Returns 0 if the set is empty. -*/ -static uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - container_t *container = - bm->high_low_container.containers[bm->high_low_container.size - 1]; - uint8_t typecode = - bm->high_low_container.typecodes[bm->high_low_container.size - 1]; - uint32_t key = - bm->high_low_container.keys[bm->high_low_container.size - 1]; - uint32_t lowvalue = container_maximum(container, typecode); - return lowvalue | (key << 16); - } - return 0; -} - -static bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, - uint32_t *element) { - container_t *container; - uint8_t typecode; - uint16_t key; - uint32_t start_rank = 0; - int i = 0; - bool valid = false; - while (!valid && i < bm->high_low_container.size) { - container = bm->high_low_container.containers[i]; - typecode = bm->high_low_container.typecodes[i]; - valid = - container_select(container, typecode, &start_rank, rank, element); - i++; - } - - if (valid) { - key = bm->high_low_container.keys[i - 1]; - *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed - return true; - } else - return false; -} - -static bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - if (container_intersect(c1, type1, c2, type2)) - return true; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); - } - } - return answer != 0; -} - -static bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, - uint64_t x, uint64_t y) { - if (x >= y) { - // Empty range. - return false; - } - roaring_uint32_iterator_t it; - roaring_init_iterator(bm, &it); - if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) { - // No values above x. - return false; - } - if (it.current_value >= y) { - // No values below y. - return false; - } - return true; -} - - -static uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, pos1, &type1); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, pos2, &type2); - answer += container_and_cardinality(c1, type1, c2, type2); - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; -} - -static double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return (double)inter / (double)(c1 + c2 - inter); -} - -static uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - inter; -} - -static uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 - inter; -} - -static uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - 2 * inter; -} - - -static bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - /* - * the next function call involves a binary search and lots of branching. - */ - int32_t i = ra_get_index(&r->high_low_container, hb); - if (i < 0) return false; - - uint8_t typecode; - // next call ought to be cheap - container_t *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - // rest might be a tad expensive, possibly involving another round of binary search - return container_contains(container, val & 0xFFFF, typecode); -} - - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) is present - */ -static bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - if (range_start >= range_end) return true; // empty range are always contained! - if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); - uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); - const int32_t span = hb_re - hb_rs; - const int32_t hlc_sz = ra_get_size(&r->high_low_container); - if (hlc_sz < span + 1) { - return false; - } - int32_t is = ra_get_index(&r->high_low_container, hb_rs); - int32_t ie = ra_get_index(&r->high_low_container, hb_re); - ie = (ie < 0 ? -ie - 1 : ie); - if ((is < 0) || ((ie - is) != span)) { - return false; - } - const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; - uint8_t type; - container_t *c = ra_get_container_at_index(&r->high_low_container, is, - &type); - if (hb_rs == hb_re) { - return container_contains_range(c, lb_rs, lb_re, type); - } - if (!container_contains_range(c, lb_rs, 1 << 16, type)) { - return false; - } - assert(ie < hlc_sz); // would indicate an algorithmic bug - c = ra_get_container_at_index(&r->high_low_container, ie, &type); - if (!container_contains_range(c, 0, lb_re, type)) { - return false; - } - int32_t i; for (i = is + 1; i < ie; ++i) { - c = ra_get_container_at_index(&r->high_low_container, i, &type); - if (!container_is_full(c, type) ) { - return false; - } - } - return true; -} - - -static bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - return (roaring_bitmap_get_cardinality(r2) > - roaring_bitmap_get_cardinality(r1) && - roaring_bitmap_is_subset(r1, r2)); -} - - -/* - * FROZEN SERIALIZATION FORMAT DESCRIPTION - * - * -- (beginning must be aligned by 32 bytes) -- - * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers] - * <run_data> rle16_t[total number of rle elements in all run containers] - * <array_data> uint16_t[total number of array elements in all array containers] - * <keys> uint16_t[num_containers] - * <counts> uint16_t[num_containers] - * <typecodes> uint8_t[num_containers] - * <header> uint32_t - * - * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) - * and the number of containers (17 bits). - * - * <counts> stores number of elements for every container. - * Its meaning depends on container type. - * For array and bitset containers, this value is the container cardinality minus one. - * For run container, it is the number of rle_t elements (n_runs). - * - * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of - * all containers of respective type. - * - * <*_data> and <keys> are kept close together because they are not accessed - * during deserilization. This may reduce IO in case of large mmaped bitmaps. - * All members have their native alignments during deserilization except <header>, - * which is not guaranteed to be aligned by 4 bytes. - */ - -static size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { - const roaring_array_t *ra = &rb->high_low_container; - size_t num_bytes = 0; - int32_t i; for (i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - num_bytes += rc->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - num_bytes += ac->cardinality * sizeof(uint16_t); - break; - } - default: - __builtin_unreachable(); - } - } - num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes - num_bytes += 4; // header - return num_bytes; -} - -static inline void *arena_alloc(char **arena, size_t num_bytes) { - char *res = *arena; - *arena += num_bytes; - return res; -} - -static void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { - /* - * Note: we do not require user to supply a specifically aligned buffer. - * Thus we have to use memcpy() everywhere. - */ - - const roaring_array_t *ra = &rb->high_low_container; - - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - int32_t i; for (i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - bitset_zone_size += - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - run_zone_size += rc->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - array_zone_size += ac->cardinality * sizeof(uint16_t); - break; - } - default: - __builtin_unreachable(); - } - } - - uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); - rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); - uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); - uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); - uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); - uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); - uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); - - for (i = 0; i < ra->size; i++) { - uint16_t count; - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = - const_CAST_bitset(ra->containers[i]); - memcpy(bitset_zone, bc->words, - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { - count = bc->cardinality - 1; - } else { - count = bitset_container_compute_cardinality(bc) - 1; - } - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - size_t num_bytes = rc->n_runs * sizeof(rle16_t); - memcpy(run_zone, rc->runs, num_bytes); - run_zone += rc->n_runs; - count = rc->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - size_t num_bytes = ac->cardinality * sizeof(uint16_t); - memcpy(array_zone, ac->array, num_bytes); - array_zone += ac->cardinality; - count = ac->cardinality - 1; - break; - } - default: - __builtin_unreachable(); - } - memcpy(&count_zone[i], &count, 2); - } - memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); - memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); - uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; - memcpy(header_zone, &header, 4); -} - -static const roaring_bitmap_t * -roaring_bitmap_frozen_view(const char *buf, size_t length) { - if ((uintptr_t)buf % 32 != 0) { - return NULL; - } - - // cookie and num_containers - if (length < 4) { - return NULL; - } - uint32_t header; - memcpy(&header, buf + length - 4, 4); // header may be misaligned - if ((header & 0x7FFF) != FROZEN_COOKIE) { - return NULL; - } - int32_t num_containers = (header >> 15); - - // typecodes, counts and keys - if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { - return NULL; - } - uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); - uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); - uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); - - // {bitset,array,run}_zone - int32_t num_bitset_containers = 0; - int32_t num_run_containers = 0; - int32_t num_array_containers = 0; - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - int32_t i; for (i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE: - num_bitset_containers++; - bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - case RUN_CONTAINER_TYPE: - num_run_containers++; - run_zone_size += counts[i] * sizeof(rle16_t); - break; - case ARRAY_CONTAINER_TYPE: - num_array_containers++; - array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); - break; - default: - return NULL; - } - } - if (length != bitset_zone_size + run_zone_size + array_zone_size + - 5 * num_containers + 4) { - return NULL; - } - uint64_t *bitset_zone = (uint64_t*) (buf); - rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); - uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); - - size_t alloc_size = 0; - alloc_size += sizeof(roaring_bitmap_t); - alloc_size += num_containers * sizeof(container_t*); - alloc_size += num_bitset_containers * sizeof(bitset_container_t); - alloc_size += num_run_containers * sizeof(run_container_t); - alloc_size += num_array_containers * sizeof(array_container_t); - - char *arena = (char *)ndpi_malloc(alloc_size); - if (arena == NULL) { - return NULL; - } - - roaring_bitmap_t *rb = (roaring_bitmap_t *) - arena_alloc(&arena, sizeof(roaring_bitmap_t)); - rb->high_low_container.flags = ROARING_FLAG_FROZEN; - rb->high_low_container.allocation_size = num_containers; - rb->high_low_container.size = num_containers; - rb->high_low_container.keys = (uint16_t *)keys; - rb->high_low_container.typecodes = (uint8_t *)typecodes; - rb->high_low_container.containers = - (container_t **)arena_alloc(&arena, - sizeof(container_t*) * num_containers); - for (i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *bitset = (bitset_container_t *) - arena_alloc(&arena, sizeof(bitset_container_t)); - bitset->words = bitset_zone; - bitset->cardinality = counts[i] + UINT32_C(1); - rb->high_low_container.containers[i] = bitset; - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case RUN_CONTAINER_TYPE: { - run_container_t *run = (run_container_t *) - arena_alloc(&arena, sizeof(run_container_t)); - run->capacity = counts[i]; - run->n_runs = counts[i]; - run->runs = run_zone; - rb->high_low_container.containers[i] = run; - run_zone += run->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *array = (array_container_t *) - arena_alloc(&arena, sizeof(array_container_t)); - array->capacity = counts[i] + UINT32_C(1); - array->cardinality = counts[i] + UINT32_C(1); - array->array = array_zone; - rb->high_low_container.containers[i] = array; - array_zone += counts[i] + UINT32_C(1); - break; - } - default: - ndpi_free(arena); - return NULL; - } - } - - return rb; -} - -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { -#endif -/* end file src/roaring.c */ /* begin file src/array_util.c */ #include <assert.h> #include <stdbool.h> @@ -11624,7 +7401,7 @@ static const uint8_t shuffle_mask16[] = { * Optimized by D. Lemire on May 3rd 2013 */ CROARING_TARGET_AVX2 -static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, +int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, const uint16_t *__restrict__ B, size_t s_b, uint16_t *C) { size_t count = 0; @@ -11641,7 +7418,7 @@ static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, v_b, vectorlength, v_a, vectorlength, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); + __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r); __m128i p = _mm_shuffle_epi8(v_a, sm16); _mm_storeu_si128((__m128i *)&C[count], p); // can overflow count += _mm_popcnt_u32(r); @@ -11665,7 +7442,7 @@ static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); const int r = _mm_extract_epi32(res_v, 0); __m128i sm16 = - _mm_load_si128((const __m128i *)shuffle_mask16 + r); + _mm_loadu_si128((const __m128i *)shuffle_mask16 + r); __m128i p = _mm_shuffle_epi8(v_a, sm16); _mm_storeu_si128((__m128i *)&C[count], p); // can overflow count += _mm_popcnt_u32(r); @@ -11703,7 +7480,7 @@ static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, CROARING_UNTARGET_REGION CROARING_TARGET_AVX2 -static int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, +int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, size_t s_a, const uint16_t *__restrict__ B, size_t s_b) { @@ -11779,7 +7556,7 @@ CROARING_TARGET_AVX2 // Warning: // This function may not be safe if A == C or B == C. ///////// -static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, +int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, const uint16_t *__restrict__ B, size_t s_b, uint16_t *C) { // we handle the degenerate case @@ -11842,7 +7619,7 @@ static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, const int bitmask_belongs_to_difference = _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; /*** next few lines are probably expensive *****/ - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + + __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + bitmask_belongs_to_difference); __m128i p = _mm_shuffle_epi8(v_a, sm16); _mm_storeu_si128((__m128i *)&C[count], p); // can overflow @@ -11877,7 +7654,7 @@ static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); const int bitmask_belongs_to_difference = _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + + __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + bitmask_belongs_to_difference); __m128i p = _mm_shuffle_epi8(v_a, sm16); _mm_storeu_si128((__m128i *)&C[count], p); // can overflow @@ -11983,7 +7760,7 @@ static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, * and binarySearch2. This approach can be slightly superior to a conventional * galloping search in some instances. */ -static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s, +int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, const uint16_t *large, size_t size_l, uint16_t *buffer) { size_t pos = 0, idx_l = 0, idx_s = 0; @@ -11993,10 +7770,10 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s, } int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small_set[idx_s]; - uint16_t target2 = small_set[idx_s + 1]; - uint16_t target3 = small_set[idx_s + 2]; - uint16_t target4 = small_set[idx_s + 3]; + uint16_t target1 = small[idx_s]; + uint16_t target2 = small[idx_s + 1]; + uint16_t target3 = small[idx_s + 2]; + uint16_t target4 = small[idx_s + 3]; binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, target4, &index1, &index2, &index3, &index4); if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { @@ -12015,8 +7792,8 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s, idx_l += index4; } if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small_set[idx_s]; - uint16_t target2 = small_set[idx_s + 1]; + uint16_t target1 = small[idx_s]; + uint16_t target2 = small[idx_s + 1]; binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, &index2); if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { @@ -12029,7 +7806,7 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s, idx_l += index2; } if ((idx_s < size_s) && (idx_l < size_l)) { - uint16_t val_s = small_set[idx_s]; + uint16_t val_s = small[idx_s]; int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); if (index >= 0) buffer[pos++] = val_s; @@ -12040,7 +7817,7 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s, // TODO: this could be accelerated, possibly, by using binarySearch4 as above. -static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set, +int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, size_t size_s, const uint16_t *large, size_t size_l) { @@ -12050,7 +7827,7 @@ static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set, return 0; } - uint16_t val_l = large[idx_l], val_s = small_set[idx_s]; + uint16_t val_l = large[idx_l], val_s = small[idx_s]; while (true) { if (val_l < val_s) { @@ -12060,12 +7837,12 @@ static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set, } else if (val_s < val_l) { idx_s++; if (idx_s == size_s) break; - val_s = small_set[idx_s]; + val_s = small[idx_s]; } else { pos++; idx_s++; if (idx_s == size_s) break; - val_s = small_set[idx_s]; + val_s = small[idx_s]; idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); if (idx_l == size_l) break; val_l = large[idx_l]; @@ -12075,7 +7852,7 @@ static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set, return (int32_t)pos; } -bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s, +bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, const uint16_t *large, size_t size_l) { size_t idx_l = 0, idx_s = 0; @@ -12083,7 +7860,7 @@ bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s, return false; } - uint16_t val_l = large[idx_l], val_s = small_set[idx_s]; + uint16_t val_l = large[idx_l], val_s = small[idx_s]; while (true) { if (val_l < val_s) { @@ -12093,7 +7870,7 @@ bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s, } else if (val_s < val_l) { idx_s++; if (idx_s == size_s) break; - val_s = small_set[idx_s]; + val_s = small[idx_s]; } else { return true; } @@ -12105,7 +7882,7 @@ bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s, /** * Generic intersection function. */ -static int32_t intersect_uint16(const uint16_t *A, const size_t lenA, +int32_t intersect_uint16(const uint16_t *A, const size_t lenA, const uint16_t *B, const size_t lenB, uint16_t *out) { const uint16_t *initout = out; if (lenA == 0 || lenB == 0) return 0; @@ -12130,7 +7907,7 @@ static int32_t intersect_uint16(const uint16_t *A, const size_t lenA, return (int32_t)(out - initout); // NOTREACHED } -static int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, +int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, const uint16_t *B, const size_t lenB) { int32_t answer = 0; if (lenA == 0 || lenB == 0) return 0; @@ -12156,7 +7933,7 @@ static int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA } -static bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, +bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, const uint16_t *B, const size_t lenB) { if (lenA == 0 || lenB == 0) return 0; const uint16_t *endA = A + lenA; @@ -12184,7 +7961,7 @@ static bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, /** * Generic intersection function. */ -static size_t intersection_uint32(const uint32_t *A, const size_t lenA, +size_t intersection_uint32(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t *out) { const uint32_t *initout = out; @@ -12210,7 +7987,7 @@ static size_t intersection_uint32(const uint32_t *A, const size_t lenA, return (out - initout); // NOTREACHED } -static size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, +size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB) { if (lenA == 0 || lenB == 0) return 0; size_t card = 0; @@ -12238,7 +8015,7 @@ static size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, // can one vectorize the computation of the union? (Update: Yes! See // union_vector16). -static size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, size_t size_2, uint16_t *buffer) { size_t pos = 0, idx_1 = 0, idx_2 = 0; @@ -12287,7 +8064,7 @@ static size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t return pos; } -static int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, +int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, int length2, uint16_t *a_out) { int out_card = 0; int k1 = 0, k2 = 0; @@ -12332,7 +8109,7 @@ static int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2 return out_card; } -static int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, +int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, const uint16_t *array_2, int32_t card_2, uint16_t *out) { int32_t pos1 = 0, pos2 = 0, pos_out = 0; while (pos1 < card_1 && pos2 < card_2) { @@ -12770,7 +8547,7 @@ CROARING_UNTARGET_REGION // could be avoided? static inline uint32_t unique(uint16_t *out, uint32_t len) { uint32_t pos = 1; - uint32_t i; for (i = 1; i < len; ++i) { + for (uint32_t i = 1; i < len; ++i) { if (out[i] != out[i - 1]) { out[pos++] = out[i]; } @@ -12786,7 +8563,7 @@ static int uint16_compare(const void *a, const void *b) { CROARING_TARGET_AVX2 // a one-pass SSE union algorithm // This function may not be safe if array1 == output or array2 == output. -static uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, +uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, const uint16_t *__restrict__ array2, uint32_t length2, uint16_t *__restrict__ output) { if ((length1 < 8) || (length2 < 8)) { @@ -12900,7 +8677,7 @@ CROARING_UNTARGET_REGION // could be avoided? Warning: assumes len > 0 static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { uint32_t pos = 1; - uint32_t i; for (i = 1; i < len; ++i) { + for (uint32_t i = 1; i < len; ++i) { if (out[i] != out[i - 1]) { out[pos++] = out[i]; } else @@ -12910,7 +8687,7 @@ static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { } CROARING_TARGET_AVX2 // a one-pass SSE xor algorithm -static uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, +uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, const uint16_t *__restrict__ array2, uint32_t length2, uint16_t *__restrict__ output) { if ((length1 < 8) || (length2 < 8)) { @@ -13019,7 +8796,7 @@ CROARING_UNTARGET_REGION #endif // CROARING_IS_X64 -static size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, +size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, size_t size_2, uint32_t *buffer) { size_t pos = 0, idx_1 = 0, idx_2 = 0; @@ -13068,7 +8845,7 @@ static size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t return pos; } -static size_t union_uint32_card(const uint32_t *set_1, size_t size_1, +size_t union_uint32_card(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, size_t size_2) { size_t pos = 0, idx_1 = 0, idx_2 = 0; @@ -13114,7 +8891,7 @@ static size_t union_uint32_card(const uint32_t *set_1, size_t size_1, -static size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, size_t size_2, uint16_t *buffer) { #ifdef CROARING_IS_X64 if( croaring_avx2() ) { @@ -13168,8 +8945,9 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) { } while (ptr1 < end8) { - uint64_t v1 = *((const uint64_t*)ptr1); - uint64_t v2 = *((const uint64_t*)ptr2); + uint64_t v1, v2; + memcpy(&v1,ptr1,sizeof(uint64_t)); + memcpy(&v2,ptr2,sizeof(uint64_t)); if (v1 != v2) { return false; } @@ -13190,7 +8968,7 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) { CROARING_UNTARGET_REGION #endif -static bool memequals(const void *s1, const void *s2, size_t n) { +bool memequals(const void *s1, const void *s2, size_t n) { if (n == 0) { return true; } @@ -13209,6 +8987,1017 @@ static bool memequals(const void *s1, const void *s2, size_t n) { } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/array_util.c */ +/* begin file src/bitset_util.c */ +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +#ifdef CROARING_IS_X64 +static uint8_t lengthTable[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; +#endif + +#ifdef CROARING_IS_X64 +ALIGNED(32) +static uint32_t vecDecodeTable[256][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ + {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ + {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ + {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ + {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ + {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ + {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ + {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ + {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ + {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ + {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ + {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ + {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ + {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ + {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ + {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ + {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ + {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ + {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ + {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ + {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ + {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ + {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ + {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ + {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ + {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ + {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ + {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ + {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ + {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ + {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ + {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ + {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ + {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ + {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ + {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ + {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ + {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ + {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ + {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ + {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ + {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ + {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ + {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ + {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ + {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ + {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ + {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ + {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ + {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ + {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ + {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ + {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ + {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ + {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ + {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ + {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ + {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ + {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ + {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ + {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ + {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ + {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ + {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ + {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ + {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ + {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ + {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ + {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ + {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ + {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ + {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ + {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ + {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ + {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ + {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ + {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ + {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ + {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ + {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ + {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ + {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ + {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ + {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ + {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ + {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ + {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ + {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ + {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ + {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ + {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ + {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ + {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ + {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ + {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ + {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ + {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ + {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ + {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ + {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ + {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ + {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ + {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ + {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ + {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ + {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ + {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ + {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ + {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ + {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ + {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ + {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ + {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ + {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ + {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ + {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ + {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ + {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ + {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ + {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ + {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ + {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ + {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ + {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ + {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ + {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ + {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ + {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ + {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ + {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ + {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ + {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ + {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ + {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ + {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ + {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ + {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ + {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ + {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ + {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ + {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ + {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ + {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ + {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ + {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ + {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ + {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ + {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ + {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ + {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ + {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ + {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ + {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ + {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ + {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ + {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ + {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ + {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ + {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ + {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ + {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ + {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ + {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ + {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ + {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ + {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ + {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ + {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ + {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ + {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ + {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ + {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ + {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ + {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ + {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ + {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ + {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ + {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ + {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ + {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ + {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ + {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ + {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ + {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ + {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ + {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ + {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ + {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ + {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ + {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ + {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ + {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ + {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ + {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ + {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ + {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ + {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ + {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ + {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ + {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ + {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ + {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ + {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ + {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ + {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ + {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ + {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ + {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ + {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ + {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ + {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ + {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ + {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ + {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ + {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ + {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ + {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ + {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ + {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ + {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ + {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ + {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ + {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ + {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ + {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ + {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ + {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ + {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ + {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ + {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ + {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ + {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ + {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ + {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ + {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ + {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ + {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ + {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ + {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ + {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ + {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ + {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ + {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ + {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ + {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ + {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ + {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ + {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ + {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ + {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ + {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ + {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ + {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ + {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ + {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ + {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif // #ifdef CROARING_IS_X64 + +#ifdef CROARING_IS_X64 +// same as vecDecodeTable but in 16 bits +ALIGNED(32) +static uint16_t vecDecodeTable_uint16[256][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ + {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ + {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ + {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ + {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ + {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ + {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ + {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ + {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ + {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ + {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ + {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ + {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ + {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ + {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ + {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ + {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ + {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ + {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ + {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ + {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ + {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ + {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ + {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ + {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ + {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ + {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ + {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ + {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ + {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ + {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ + {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ + {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ + {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ + {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ + {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ + {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ + {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ + {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ + {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ + {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ + {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ + {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ + {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ + {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ + {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ + {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ + {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ + {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ + {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ + {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ + {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ + {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ + {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ + {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ + {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ + {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ + {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ + {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ + {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ + {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ + {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ + {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ + {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ + {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ + {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ + {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ + {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ + {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ + {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ + {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ + {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ + {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ + {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ + {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ + {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ + {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ + {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ + {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ + {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ + {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ + {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ + {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ + {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ + {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ + {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ + {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ + {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ + {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ + {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ + {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ + {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ + {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ + {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ + {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ + {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ + {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ + {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ + {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ + {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ + {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ + {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ + {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ + {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ + {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ + {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ + {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ + {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ + {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ + {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ + {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ + {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ + {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ + {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ + {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ + {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ + {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ + {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ + {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ + {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ + {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ + {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ + {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ + {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ + {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ + {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ + {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ + {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ + {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ + {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ + {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ + {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ + {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ + {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ + {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ + {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ + {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ + {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ + {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ + {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ + {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ + {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ + {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ + {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ + {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ + {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ + {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ + {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ + {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ + {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ + {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ + {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ + {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ + {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ + {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ + {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ + {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ + {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ + {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ + {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ + {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ + {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ + {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ + {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ + {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ + {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ + {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ + {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ + {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ + {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ + {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ + {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ + {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ + {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ + {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ + {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ + {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ + {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ + {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ + {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ + {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ + {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ + {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ + {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ + {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ + {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ + {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ + {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ + {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ + {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ + {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ + {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ + {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ + {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ + {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ + {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ + {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ + {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ + {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ + {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ + {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ + {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ + {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ + {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ + {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ + {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ + {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ + {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ + {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ + {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ + {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ + {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ + {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ + {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ + {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ + {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ + {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ + {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ + {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ + {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ + {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ + {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ + {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ + {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ + {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ + {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ + {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ + {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ + {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ + {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ + {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ + {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ + {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ + {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ + {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ + {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ + {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ + {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ + {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ + {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ + {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ + {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ + {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ + {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ + {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ + {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ + {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ + {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ + {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ + {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ + {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ + {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ + {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ + {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ + {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ + {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif + +#ifdef CROARING_IS_X64 +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, + uint32_t *out, size_t outcapacity, + uint32_t base) { + uint32_t *initout = out; + __m256i baseVec = _mm256_set1_epi32(base - 1); + __m256i incVec = _mm256_set1_epi32(64); + __m256i add8 = _mm256_set1_epi32(8); + uint32_t *safeout = out + outcapacity; + size_t i = 0; + for (; (i < length) && (out + 64 <= safeout); ++i) { + uint64_t w = words[i]; + if (w == 0) { + baseVec = _mm256_add_epi32(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m256i vecA = + _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]); + __m256i vecB = + _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm256_add_epi32(baseVec, vecA); + baseVec = _mm256_add_epi32(baseVec, add8); + vecB = _mm256_add_epi32(baseVec, vecB); + baseVec = _mm256_add_epi32(baseVec, add8); + _mm256_storeu_si256((__m256i *)out, vecA); + out += advanceA; + _mm256_storeu_si256((__m256i *)out, vecB); + out += advanceB; + } + } + } + base += i * 64; + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) + int r = __builtin_ctzll(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + out++; + w ^= t; + } + base += 64; + } + return out - initout; +} +CROARING_UNTARGET_REGION +#endif // CROARING_IS_X64 + +size_t bitset_extract_setbits(const uint64_t *words, size_t length, + uint32_t *out, uint32_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) + int r = __builtin_ctzll(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + w ^= t; + } + base += 64; + } + return outpos; +} + +size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1, + const uint64_t * __restrict__ words2, + size_t length, uint16_t *out, + uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words1[i] & words2[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + out[outpos++] = r + base; + w ^= t; + } + base += 64; + } + return outpos; +} + +#ifdef CROARING_IS_X64 +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out" as 16-bit integers, values start at "base" (can + *be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + * + * This function uses SSE decoding. + */ +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, + uint16_t *out, size_t outcapacity, + uint16_t base) { + uint16_t *initout = out; + __m128i baseVec = _mm_set1_epi16(base - 1); + __m128i incVec = _mm_set1_epi16(64); + __m128i add8 = _mm_set1_epi16(8); + uint16_t *safeout = out + outcapacity; + const int numberofbytes = 2; // process two bytes at a time + size_t i = 0; + for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { + uint64_t w = words[i]; + if (w == 0) { + baseVec = _mm_add_epi16(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m128i vecA = _mm_loadu_si128( + (const __m128i *)vecDecodeTable_uint16[byteA]); + __m128i vecB = _mm_loadu_si128( + (const __m128i *)vecDecodeTable_uint16[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm_add_epi16(baseVec, vecA); + baseVec = _mm_add_epi16(baseVec, add8); + vecB = _mm_add_epi16(baseVec, vecB); + baseVec = _mm_add_epi16(baseVec, add8); + _mm_storeu_si128((__m128i *)out, vecA); + out += advanceA; + _mm_storeu_si128((__m128i *)out, vecB); + out += advanceB; + } + } + } + base += (uint16_t)(i * 64); + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + *out = r + base; + out++; + w ^= t; + } + base += 64; + } + return out - initout; +} +CROARING_UNTARGET_REGION +#endif + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base" (can be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, + uint16_t *out, uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + out[outpos++] = r + base; + w ^= t; + } + base += 64; + } + return outpos; +} + +#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) + +static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // TODO: could unroll for performance, see bitset_set_list + // bts is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)\n" + "sbb $-1, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); + return card; +} + +static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t pos; + const uint16_t *end = list + length; + + uint64_t shift = 6; + uint64_t offset; + uint64_t load; + for (; list + 3 < end; list += 4) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[1]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[2]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[3]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + } + + while (list != end) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + list++; + } +} + +static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // btr is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "btr %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)\n" + "sbb $0, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [words] "r"(words), [shift] "r"(shift) + : + /* clobbers */ "memory"); + return card; +} + +static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + words[offset] = newload; + list++; + } + return card; +} + +static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + words[offset] = newload; + list++; + } + return card; +} + +static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + words[offset] = newload; + list++; + } +} + +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + if( croaring_avx2() ) { + return _asm_bitset_clear_list(words, card, list, length); + } else { + return _scalar_bitset_clear_list(words, card, list, length); + } +} + +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + if( croaring_avx2() ) { + return _asm_bitset_set_list_withcard(words, card, list, length); + } else { + return _scalar_bitset_set_list_withcard(words, card, list, length); + } +} + +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + if( croaring_avx2() ) { + _asm_bitset_set_list(words, list, length); + } else { + _scalar_bitset_set_list(words, list, length); + } +} +#else +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + words[offset] = newload; + list++; + } + return card; +} + +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + words[offset] = newload; + list++; + } + return card; +} + +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + words[offset] = newload; + list++; + } +} + +#endif + +/* flip specified bits */ +/* TODO: consider whether worthwhile to make an asm version */ + +uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load ^ (UINT64_C(1) << index); + // todo: is a branch here all that bad? + card += + (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 + words[offset] = newload; + list++; + } + return card; +} + +void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load ^ (UINT64_C(1) << index); + words[offset] = newload; + list++; + } +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/bitset_util.c */ /* begin file src/containers/array.c */ /* * array.c @@ -13233,25 +10022,24 @@ extern inline bool array_container_contains(const array_container_t *arr, uint16_t pos); extern inline int array_container_cardinality(const array_container_t *array); extern inline bool array_container_nonzero_cardinality(const array_container_t *array); -extern inline void array_container_clear(array_container_t *array); extern inline int32_t array_container_serialized_size_in_bytes(int32_t card); extern inline bool array_container_empty(const array_container_t *array); extern inline bool array_container_full(const array_container_t *array); /* Create a new array with capacity size. Return NULL in case of failure. */ -static array_container_t *array_container_create_given_capacity(int32_t size) { +array_container_t *array_container_create_given_capacity(int32_t size) { array_container_t *container; - if ((container = (array_container_t *)ndpi_malloc(sizeof(array_container_t))) == + if ((container = (array_container_t *)roaring_malloc(sizeof(array_container_t))) == NULL) { return NULL; } if( size <= 0 ) { // we don't want to rely on malloc(0) container->array = NULL; - } else if ((container->array = (uint16_t *)ndpi_malloc(sizeof(uint16_t) * size)) == + } else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) * size)) == NULL) { - ndpi_free(container); + roaring_free(container); return NULL; } @@ -13262,23 +10050,23 @@ static array_container_t *array_container_create_given_capacity(int32_t size) { } /* Create a new array. Return NULL in case of failure. */ -static array_container_t *array_container_create() { +array_container_t *array_container_create() { return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); } /* Create a new array containing all values in [min,max). */ -static array_container_t * array_container_create_range(uint32_t min, uint32_t max) { +array_container_t * array_container_create_range(uint32_t min, uint32_t max) { array_container_t * answer = array_container_create_given_capacity(max - min + 1); if(answer == NULL) return answer; answer->cardinality = 0; - uint32_t k; for(k = min; k < max; k++) { + for(uint32_t k = min; k < max; k++) { answer->array[answer->cardinality++] = k; } return answer; } /* Duplicate container */ -static array_container_t *array_container_clone(const array_container_t *src) { +array_container_t *array_container_clone(const array_container_t *src) { array_container_t *newcontainer = array_container_create_given_capacity(src->capacity); if (newcontainer == NULL) return NULL; @@ -13291,30 +10079,56 @@ static array_container_t *array_container_clone(const array_container_t *src) { return newcontainer; } -static int array_container_shrink_to_fit(array_container_t *src) { +void array_container_offset(const array_container_t *c, + container_t **loc, container_t **hic, + uint16_t offset) { + array_container_t *lo = NULL, *hi = NULL; + int top, lo_cap, hi_cap; + + top = (1 << 16) - offset; + + lo_cap = count_less(c->array, c->cardinality, top); + if (loc && lo_cap) { + lo = array_container_create_given_capacity(lo_cap); + for (int i = 0; i < lo_cap; ++i) { + array_container_add(lo, c->array[i] + offset); + } + *loc = (container_t*)lo; + } + + hi_cap = c->cardinality - lo_cap; + if (hic && hi_cap) { + hi = array_container_create_given_capacity(hi_cap); + for (int i = lo_cap; i < c->cardinality; ++i) { + array_container_add(hi, c->array[i] + offset); + } + *hic = (container_t*)hi; + } +} + +int array_container_shrink_to_fit(array_container_t *src) { if (src->cardinality == src->capacity) return 0; // nothing to do - int old_capacity = src->capacity; int savings = src->capacity - src->cardinality; src->capacity = src->cardinality; if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs - ndpi_free(src->array); + roaring_free(src->array); src->array = NULL; } else { uint16_t *oldarray = src->array; src->array = - (uint16_t *)ndpi_realloc(oldarray, old_capacity * sizeof(uint16_t), src->capacity * sizeof(uint16_t)); - if (src->array == NULL) ndpi_free(oldarray); // should never happen? + (uint16_t *)roaring_realloc(oldarray, src->capacity * sizeof(uint16_t)); + if (src->array == NULL) roaring_free(oldarray); // should never happen? } return savings; } /* Free memory. */ -static void array_container_free(array_container_t *arr) { +void array_container_free(array_container_t *arr) { if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise - ndpi_free(arr->array); + roaring_free(arr->array); arr->array = NULL; // pedantic } - ndpi_free(arr); + roaring_free(arr); } static inline int32_t grow_capacity(int32_t capacity) { @@ -13328,26 +10142,25 @@ static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { return ((val < min) ? min : (val > max) ? max : val); } -static void array_container_grow(array_container_t *container, int32_t min, +void array_container_grow(array_container_t *container, int32_t min, bool preserve) { int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); - int32_t old_capacity = container->capacity; container->capacity = new_capacity; uint16_t *array = container->array; if (preserve) { container->array = - (uint16_t *)ndpi_realloc(array, old_capacity * sizeof(uint16_t), new_capacity * sizeof(uint16_t)); - if (container->array == NULL) ndpi_free(array); + (uint16_t *)roaring_realloc(array, new_capacity * sizeof(uint16_t)); + if (container->array == NULL) roaring_free(array); } else { // Jon Strabala reports that some tools complain otherwise if (array != NULL) { - ndpi_free(array); + roaring_free(array); } - container->array = (uint16_t *)ndpi_malloc(new_capacity * sizeof(uint16_t)); + container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t)); } // handle the case where realloc fails @@ -13358,7 +10171,7 @@ static void array_container_grow(array_container_t *container, int32_t min, } /* Copy one container into another. We assume that they are distinct. */ -static void array_container_copy(const array_container_t *src, +void array_container_copy(const array_container_t *src, array_container_t *dst) { const int32_t cardinality = src->cardinality; if (cardinality > dst->capacity) { @@ -13369,9 +10182,9 @@ static void array_container_copy(const array_container_t *src, memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); } -static void array_container_add_from_range(array_container_t *arr, uint32_t min, +void array_container_add_from_range(array_container_t *arr, uint32_t min, uint32_t max, uint16_t step) { - uint32_t value; for (value = min; value < max; value += step) { + for (uint32_t value = min; value < max; value += step) { array_container_append(arr, value); } } @@ -13379,7 +10192,7 @@ static void array_container_add_from_range(array_container_t *arr, uint32_t min, /* Computes the union of array1 and array2 and write the result to arrayout. * It is assumed that arrayout is distinct from both array1 and array2. */ -static void array_container_union(const array_container_t *array_1, +void array_container_union(const array_container_t *array_1, const array_container_t *array_2, array_container_t *out) { const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; @@ -13397,7 +10210,7 @@ static void array_container_union(const array_container_t *array_1, * to array out. * Array out does not need to be distinct from array_1 */ -static void array_container_andnot(const array_container_t *array_1, +void array_container_andnot(const array_container_t *array_1, const array_container_t *array_2, array_container_t *out) { if (out->capacity < array_1->cardinality) @@ -13424,7 +10237,7 @@ static void array_container_andnot(const array_container_t *array_1, * to arrayout. * It is assumed that arrayout is distinct from both array1 and array2. */ -static void array_container_xor(const array_container_t *array_1, +void array_container_xor(const array_container_t *array_1, const array_container_t *array_2, array_container_t *out) { const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; @@ -13458,7 +10271,7 @@ static inline int32_t minimum_int32(int32_t a, int32_t b) { * arrayout. * It is assumed that arrayout is distinct from both array1 and array2. * */ -static void array_container_intersection(const array_container_t *array1, +void array_container_intersection(const array_container_t *array1, const array_container_t *array2, array_container_t *out) { int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, @@ -13499,7 +10312,7 @@ static void array_container_intersection(const array_container_t *array1, /* computes the size of the intersection of array1 and array2 * */ -static int array_container_intersection_cardinality(const array_container_t *array1, +int array_container_intersection_cardinality(const array_container_t *array1, const array_container_t *array2) { int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; const int threshold = 64; // subject to tuning @@ -13525,7 +10338,7 @@ static int array_container_intersection_cardinality(const array_container_t *arr } } -static bool array_container_intersect(const array_container_t *array1, +bool array_container_intersect(const array_container_t *array1, const array_container_t *array2) { int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; const int threshold = 64; // subject to tuning @@ -13545,7 +10358,7 @@ static bool array_container_intersect(const array_container_t *array1, /* computes the intersection of array1 and array2 and write the result to * array1. * */ -static void array_container_intersection_inplace(array_container_t *src_1, +void array_container_intersection_inplace(array_container_t *src_1, const array_container_t *src_2) { // todo: can any of this be vectorized? int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; @@ -13562,11 +10375,12 @@ static void array_container_intersection_inplace(array_container_t *src_1, } } -static int array_container_to_uint32_array(void *vout, const array_container_t *cont, +ALLOW_UNALIGNED +int array_container_to_uint32_array(void *vout, const array_container_t *cont, uint32_t base) { int outpos = 0; uint32_t *out = (uint32_t *)vout; - int i = 0; for (i = 0; i < cont->cardinality; ++i) { + for (int i = 0; i < cont->cardinality; ++i) { const uint32_t val = base + cont->array[i]; memcpy(out + outpos, &val, sizeof(uint32_t)); // should be compiled as a MOV on x64 @@ -13575,37 +10389,36 @@ static int array_container_to_uint32_array(void *vout, const array_container_t * return outpos; } -static void array_container_printf(const array_container_t *v) { +void array_container_printf(const array_container_t *v) { if (v->cardinality == 0) { printf("{}"); return; } printf("{"); printf("%d", v->array[0]); - int i ; for (i = 1; i < v->cardinality; ++i) { + for (int i = 1; i < v->cardinality; ++i) { printf(",%d", v->array[i]); } printf("}"); } -static void array_container_printf_as_uint32_array(const array_container_t *v, +void array_container_printf_as_uint32_array(const array_container_t *v, uint32_t base) { if (v->cardinality == 0) { return; } printf("%u", v->array[0] + base); - int i ; for (i = 1; i < v->cardinality; ++i) { + for (int i = 1; i < v->cardinality; ++i) { printf(",%u", v->array[i] + base); } } /* Compute the number of runs */ -static int32_t array_container_number_of_runs(const array_container_t *ac) { +int32_t array_container_number_of_runs(const array_container_t *ac) { // Can SIMD work here? int32_t nr_runs = 0; int32_t prev = -2; - const uint16_t *p; - for (p = ac->array; p != ac->array + ac->cardinality; ++p) { + for (const uint16_t *p = ac->array; p != ac->array + ac->cardinality; ++p) { if (*p != prev + 1) nr_runs++; prev = *p; } @@ -13618,12 +10431,12 @@ static int32_t array_container_number_of_runs(const array_container_t *ac) { * array_container_size_in_bytes(container). * */ -static int32_t array_container_write(const array_container_t *container, char *buf) { +int32_t array_container_write(const array_container_t *container, char *buf) { memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); return array_container_size_in_bytes(container); } -static bool array_container_is_subset(const array_container_t *container1, +bool array_container_is_subset(const array_container_t *container1, const array_container_t *container2) { if (container1->cardinality > container2->cardinality) { return false; @@ -13646,7 +10459,7 @@ static bool array_container_is_subset(const array_container_t *container1, } } -static int32_t array_container_read(int32_t cardinality, array_container_t *container, +int32_t array_container_read(int32_t cardinality, array_container_t *container, const char *buf) { if (container->capacity < cardinality) { array_container_grow(container, cardinality, false); @@ -13657,17 +10470,17 @@ static int32_t array_container_read(int32_t cardinality, array_container_t *cont return array_container_size_in_bytes(container); } -static bool array_container_iterate(const array_container_t *cont, uint32_t base, +bool array_container_iterate(const array_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { - int i = 0; for (i = 0; i < cont->cardinality; i++) + for (int i = 0; i < cont->cardinality; i++) if (!iterator(cont->array[i] + base, ptr)) return false; return true; } -static bool array_container_iterate64(const array_container_t *cont, uint32_t base, +bool array_container_iterate64(const array_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { - int i = 0; for (i = 0; i < cont->cardinality; i++) + for (int i = 0; i < cont->cardinality; i++) if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) return false; return true; @@ -13677,13 +10490,17 @@ static bool array_container_iterate64(const array_container_t *cont, uint32_t ba } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/array.c */ -/* begin file src/containers/mixed_union.c */ +/* begin file src/containers/bitset.c */ /* - * mixed_union.c + * bitset.c * */ - +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif #include <assert.h> +#include <stdio.h> +#include <stdlib.h> #include <string.h> @@ -13691,1474 +10508,945 @@ static bool array_container_iterate64(const array_container_t *cont, uint32_t ba extern "C" { namespace roaring { namespace internal { #endif -/* Compute the union of src_1 and src_2 and write the result to - * dst. */ -static void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - dst->cardinality = (int32_t)bitset_set_list_withcard( - dst->words, dst->cardinality, src_1->array, src_1->cardinality); +extern inline int bitset_container_cardinality(const bitset_container_t *bitset); +extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos); +// unused at this time: +//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); +extern inline bool bitset_container_get(const bitset_container_t *bitset, + uint16_t pos); +extern inline int32_t bitset_container_serialized_size_in_bytes(void); +extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); +extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); +extern inline bool bitset_container_contains(const bitset_container_t *bitset, + uint16_t pos); + +void bitset_container_clear(bitset_container_t *bitset) { + memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + bitset->cardinality = 0; } -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -static void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_set_list(dst->words, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +void bitset_container_set_all(bitset_container_t *bitset) { + memset(bitset->words, INT64_C(-1), + sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + bitset->cardinality = (1 << 16); } -static void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->words, rle.value, rle.length); + + +/* Create a new bitset. Return NULL in case of failure. */ +bitset_container_t *bitset_container_create(void) { + bitset_container_t *bitset = + (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t)); + + if (!bitset) { + return NULL; } - dst->cardinality = bitset_container_compute_cardinality(dst); + // sizeof(__m256i) == 32 + bitset->words = (uint64_t *)roaring_aligned_malloc( + 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + if (!bitset->words) { + roaring_free(bitset); + return NULL; + } + bitset_container_clear(bitset); + return bitset; } -static void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->words, rle.value, rle.length); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +/* Copy one container into another. We assume that they are distinct. */ +void bitset_container_copy(const bitset_container_t *source, + bitset_container_t *dest) { + dest->cardinality = source->cardinality; + memcpy(dest->words, source->words, + sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); } -// why do we leave the result as a run container?? -static void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - if (run_container_is_full(src_2)) { - run_container_copy(src_2, dst); - return; - } - // TODO: see whether the "2*" is spurious - run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t previousrle; - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(dst, src_2->runs[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(dst, src_1->array[arraypos]); - arraypos++; - } - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; +void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, + uint32_t max, uint16_t step) { + if (step == 0) return; // refuse to crash + if ((64 % step) == 0) { // step divides 64 + uint64_t mask = 0; // construct the repeated mask + for (uint32_t value = (min % step); value < 64; value += step) { + mask |= ((uint64_t)1 << value); } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; + uint32_t firstword = min / 64; + uint32_t endword = (max - 1) / 64; + bitset->cardinality = (max - min + step - 1) / step; + if (firstword == endword) { + bitset->words[firstword] |= + mask & (((~UINT64_C(0)) << (min % 64)) & + ((~UINT64_C(0)) >> ((~max + 1) % 64))); + return; } + bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); + for (uint32_t i = firstword + 1; i < endword; i++) + bitset->words[i] = mask; + bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); } else { - while (rlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; + for (uint32_t value = min; value < max; value += step) { + bitset_container_add(bitset, value); } } } -static void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return; +/* Free memory. */ +void bitset_container_free(bitset_container_t *bitset) { + if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise + roaring_aligned_free(bitset->words); + bitset->words = NULL; // pedantic } - const int32_t maxoutput = src_1->cardinality + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_2->n_runs; - if (src_2->capacity < neededcapacity) - run_container_grow(src_2, neededcapacity, true); - memmove(src_2->runs + maxoutput, src_2->runs, - src_2->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc2 = src_2->runs + maxoutput; - int32_t rlepos = 0; - int32_t arraypos = 0; - int src2nruns = src_2->n_runs; - src_2->n_runs = 0; + roaring_free(bitset); +} - rle16_t previousrle; +/* duplicate container. */ +bitset_container_t *bitset_container_clone(const bitset_container_t *src) { + bitset_container_t *bitset = + (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t)); - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(src_2, src_1->array[arraypos]); - arraypos++; + if (!bitset) { + return NULL; + } + // sizeof(__m256i) == 32 + bitset->words = (uint64_t *)roaring_aligned_malloc( + 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + if (!bitset->words) { + roaring_free(bitset); + return NULL; } + bitset->cardinality = src->cardinality; + memcpy(bitset->words, src->words, + sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + return bitset; +} - while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; +void bitset_container_offset(const bitset_container_t *c, + container_t **loc, container_t **hic, + uint16_t offset) { + bitset_container_t *bc = NULL; + uint64_t val; + uint16_t b, i, end; + + b = offset >> 6; + i = offset % 64; + end = 1024 - b; + + if (loc != NULL) { + bc = bitset_container_create(); + if (i == 0) { + memcpy(bc->words+b, c->words, 8*end); } else { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; + bc->words[b] = c->words[0] << i; + for (uint32_t k = 1; k < end; ++k) { + val = c->words[k] << i; + val |= c->words[k-1] >> (64 - i); + bc->words[b+k] = val; + } } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; + + bc->cardinality = bitset_container_compute_cardinality(bc); + if (bc->cardinality != 0) { + *loc = bc; } - } else { - while (rlepos < src2nruns) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; + if (bc->cardinality == c->cardinality) { + return; } } -} -static bool array_array_container_union( - const array_container_t *src_1, const array_container_t *src_2, - container_t **dst -){ - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset + if (hic == NULL) { + // Both hic and loc can't be NULL, so bc is never NULL here + if (bc->cardinality == 0) { + bitset_container_free(bc); + } + return; } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->words, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } + + if (bc == NULL || bc->cardinality != 0) { + bc = bitset_container_create(); } - return returnval; -} -static bool array_array_container_inplace_union( - array_container_t *src_1, const array_container_t *src_2, - container_t **dst -){ - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset + if (i == 0) { + memcpy(bc->words, c->words+end, 8*b); + } else { + for (uint32_t k = end; k < 1024; ++k) { + val = c->words[k] << i; + val |= c->words[k-1] >> (64 - i); + bc->words[k-end] = val; } + bc->words[b] = c->words[1023] >> (64 - i); } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->words, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - if(src_1->capacity < ourbitset->cardinality) { - array_container_grow(src_1, ourbitset->cardinality, false); - } - bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS, - src_1->array, 0); - src_1->cardinality = ourbitset->cardinality; - *dst = src_1; - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } + bc->cardinality = bitset_container_compute_cardinality(bc); + if (bc->cardinality == 0) { + bitset_container_free(bc); + return; } - return returnval; + *hic = bc; } - -static bool array_array_container_lazy_union( - const array_container_t *src_1, const array_container_t *src_2, - container_t **dst -){ - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; +void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, + uint32_t end) { + bitset_set_range(bitset->words, begin, end); + bitset->cardinality = + bitset_container_compute_cardinality(bitset); // could be smarter } -static bool array_array_container_lazy_inplace_union( - array_container_t *src_1, const array_container_t *src_2, - container_t **dst -){ - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; +bool bitset_container_intersect(const bitset_container_t *src_1, + const bitset_container_t *src_2) { + // could vectorize, but this is probably already quite fast in practice + const uint64_t * __restrict__ words_1 = src_1->words; + const uint64_t * __restrict__ words_2 = src_2->words; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { + if((words_1[i] & words_2[i]) != 0) return true; } - return returnval; + return false; } -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/mixed_union.c */ -/* begin file src/containers/convert.c */ -#include <stdio.h> - -#ifdef __cplusplus -extern "C" { namespace roaring { namespace internal { +#ifdef CROARING_IS_X64 +#ifndef WORDS_IN_AVX2_REG +#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) #endif - -// file contains grubby stuff that must know impl. details of all container -// types. -static bitset_container_t *bitset_container_from_array(const array_container_t *ac) { - bitset_container_t *ans = bitset_container_create(); - int limit = array_container_cardinality(ac); - int i = 0; for (i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]); - return ans; +/* Get the number of bits set (force computation) */ +static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) { + const uint64_t *words = bitset->words; + int32_t sum = 0; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { + sum += hamming(words[i]); + sum += hamming(words[i + 1]); + sum += hamming(words[i + 2]); + sum += hamming(words[i + 3]); + } + return sum; } +/* Get the number of bits set (force computation) */ +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { + if( croaring_avx2() ) { + return (int) avx2_harley_seal_popcount256( + (const __m256i *)bitset->words, + BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); + } else { + return _scalar_bitset_container_compute_cardinality(bitset); -static bitset_container_t *bitset_container_from_run(const run_container_t *arr) { - int card = run_container_cardinality(arr); - bitset_container_t *answer = bitset_container_create(); - int rlepos; for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - rle16_t vl = arr->runs[rlepos]; - bitset_set_lenrange(answer->words, vl.value, vl.length); } - answer->cardinality = card; - return answer; } -static array_container_t *array_container_from_run(const run_container_t *arr) { - array_container_t *answer = - array_container_create_given_capacity(run_container_cardinality(arr)); - answer->cardinality = 0; - int rlepos; for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - int run_start = arr->runs[rlepos].value; - int run_end = run_start + arr->runs[rlepos].length; - - int run_value; for (run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } +#elif defined(USENEON) +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { + uint16x8_t n0 = vdupq_n_u16(0); + uint16x8_t n1 = vdupq_n_u16(0); + uint16x8_t n2 = vdupq_n_u16(0); + uint16x8_t n3 = vdupq_n_u16(0); + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { + uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]); + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); + uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]); + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); + uint64x2_t c2 = vld1q_u64(&bitset->words[i + 4]); + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); + uint64x2_t c3 = vld1q_u64(&bitset->words[i + 6]); + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); } - return answer; -} - -static array_container_t *array_container_from_bitset(const bitset_container_t *bits) { - array_container_t *result = - array_container_create_given_capacity(bits->cardinality); - result->cardinality = bits->cardinality; - // sse version ends up being slower here - // (bitset_extract_setbits_sse_uint16) - // because of the sparsity of the data - bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, - result->array, 0); - return result; -} - -/* assumes that container has adequate space. Run from [s,e] (inclusive) */ -static void add_run(run_container_t *rc, int s, int e) { - rc->runs[rc->n_runs].value = s; - rc->runs[rc->n_runs].length = e - s; - rc->n_runs++; + uint64x2_t n = vdupq_n_u64(0); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); + return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); } -static run_container_t *run_container_from_array(const array_container_t *c) { - int32_t n_runs = array_container_number_of_runs(c); - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - int32_t card = c->cardinality; - if (card == 0) return answer; - int i = 0; for (i = 0; i < card; ++i) { - const uint16_t cur_val = c->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c->array[i]; - } - // now prev is the last seen value - add_run(answer, run_start, prev); - // assert(run_container_cardinality(answer) == c->cardinality); - return answer; -} +#else // CROARING_IS_X64 -/** - * Convert the runcontainer to either a Bitmap or an Array Container, depending - * on the cardinality. Frees the container. - * Allocates and returns new container, which caller is responsible for freeing. - * It does not free the run container. - */ -static container_t *convert_to_bitset_or_array_container( - run_container_t *rc, int32_t card, - uint8_t *resulttype -){ - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - int rlepos; for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) { - uint16_t run_start = rc->runs[rlepos].value; - uint16_t run_end = run_start + rc->runs[rlepos].length; - uint16_t run_value; for (run_value = run_start; run_value <= run_end; - ++run_value) { - answer->array[answer->cardinality++] = run_value; - } - } - assert(card == answer->cardinality); - *resulttype = ARRAY_CONTAINER_TYPE; - //run_container_free(r); - return answer; - } - bitset_container_t *answer = bitset_container_create(); - int rlepos; for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) { - uint16_t run_start = rc->runs[rlepos].value; - bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length); +/* Get the number of bits set (force computation) */ +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { + const uint64_t *words = bitset->words; + int32_t sum = 0; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { + sum += hamming(words[i]); + sum += hamming(words[i + 1]); + sum += hamming(words[i + 2]); + sum += hamming(words[i + 3]); } - answer->cardinality = card; - *resulttype = BITSET_CONTAINER_TYPE; - //run_container_free(r); - return answer; + return sum; } -/* Converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes responsible to free the new one. */ -static container_t *convert_run_to_efficient_container( - run_container_t *c, - uint8_t *typecode_after -){ - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(c->n_runs); +#endif // CROARING_IS_X64 - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - int32_t card = run_container_cardinality(c); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); +#ifdef CROARING_IS_X64 - int32_t min_size_non_run = - size_as_bitset_container < size_as_array_container - ? size_as_bitset_container - : size_as_array_container; - if (size_as_run_container <= min_size_non_run) { // no conversion - *typecode_after = RUN_CONTAINER_TYPE; - return c; - } - if (card <= DEFAULT_MAX_SIZE) { - // to array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - int rlepos; for (rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int run_start = c->runs[rlepos].value; - int run_end = run_start + c->runs[rlepos].length; +#define BITSET_CONTAINER_FN_REPEAT 8 +#ifndef WORDS_IN_AVX2_REG +#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) +#endif // WORDS_IN_AVX2_REG +#define LOOP_SIZE \ + BITSET_CONTAINER_SIZE_IN_WORDS / \ + ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) - int run_value; for (run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - *typecode_after = ARRAY_CONTAINER_TYPE; - return answer; - } +/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the + result to bitsetout */ +// clang-format off +#define AVX_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + static inline int _avx2_bitset_container_##opname##_nocard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words; \ + const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words; \ + /* not using the blocking optimization for some reason*/ \ + uint8_t *out = (uint8_t *)dst->words; \ + const int innerloop = 8; \ + for (size_t i = 0; \ + i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ + i += innerloop) { \ + __m256i A1, A2, AO; \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)out, AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 32), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 64), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 96), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 128), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 160), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 192), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 224), AO); \ + out += 256; \ + words_1 += 256; \ + words_2 += 256; \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ + } - // else to bitset - bitset_container_t *answer = bitset_container_create(); +#define AVX_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + /* next, a version that updates cardinality*/ \ + static inline int _avx2_bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words; \ + const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words; \ + __m256i *out = (__m256i *)dst->words; \ + dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \ + words_2, words_1, out, \ + BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \ + return dst->cardinality; \ + } \ - int rlepos; for (rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int start = c->runs[rlepos].value; - int end = start + c->runs[rlepos].length; - bitset_set_range(answer->words, start, end + 1); - } - answer->cardinality = card; - *typecode_after = BITSET_CONTAINER_TYPE; - return answer; -} +#define AVX_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + /* next, a version that just computes the cardinality*/ \ + static inline int _avx2_bitset_container_##opname##_justcard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2) { \ + const __m256i *__restrict__ data1 = (const __m256i *)src_1->words; \ + const __m256i *__restrict__ data2 = (const __m256i *)src_2->words; \ + return (int)avx2_harley_seal_popcount256_##opname( \ + data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \ + } -// like convert_run_to_efficient_container but frees the old result if needed -static container_t *convert_run_to_efficient_container_and_free( - run_container_t *c, - uint8_t *typecode_after -){ - container_t *answer = convert_run_to_efficient_container(c, typecode_after); - if (answer != c) run_container_free(c); - return answer; -} -/* once converted, the original container is disposed here, rather than - in roaring_array -*/ +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION -// TODO: split into run- array- and bitset- subfunctions for sanity; -// a few function calls won't really matter. +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION -static container_t *convert_run_optimize( - container_t *c, uint8_t typecode_original, - uint8_t *typecode_after -){ - if (typecode_original == RUN_CONTAINER_TYPE) { - container_t *newc = convert_run_to_efficient_container( - CAST_run(c), typecode_after); - if (newc != c) { - container_free(c, typecode_original); - } - return newc; - } else if (typecode_original == ARRAY_CONTAINER_TYPE) { - // it might need to be converted to a run container. - array_container_t *c_qua_array = CAST_array(c); - int32_t n_runs = array_container_number_of_runs(c_qua_array); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t card = array_container_cardinality(c_qua_array); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - if (size_as_run_container >= size_as_array_container) { - *typecode_after = ARRAY_CONTAINER_TYPE; - return c; - } - // else convert array to run container - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - assert(card > 0); - int i = 0; for (i = 0; i < card; ++i) { - uint16_t cur_val = c_qua_array->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c_qua_array->array[i]; - } - assert(run_start >= 0); - // now prev is the last seen value - add_run(answer, run_start, prev); - *typecode_after = RUN_CONTAINER_TYPE; - array_container_free(c_qua_array); - return answer; - } else if (typecode_original == - BITSET_CONTAINER_TYPE) { // run conversions on bitset - // does bitset need conversion to run? - bitset_container_t *c_qua_bitset = CAST_bitset(c); - int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - if (size_as_bitset_container <= size_as_run_container) { - // no conversion needed. - *typecode_after = BITSET_CONTAINER_TYPE; - return c; - } - // bitset to runcontainer (ported from Java RunContainer( - // BitmapContainer bc, int nbrRuns)) - assert(n_runs > 0); // no empty bitmaps - run_container_t *answer = run_container_create_given_capacity(n_runs); +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - int long_ctr = 0; - uint64_t cur_word = c_qua_bitset->words[0]; - while (true) { - while (cur_word == UINT64_C(0) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word = c_qua_bitset->words[++long_ctr]; +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - if (cur_word == UINT64_C(0)) { - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE; - return answer; - } +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - int local_run_start = __builtin_ctzll(cur_word); - int run_start = local_run_start + 64 * long_ctr; - uint64_t cur_word_with_1s = cur_word | (cur_word - 1); +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION) +CROARING_UNTARGET_REGION - int run_end = 0; - while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word_with_1s = c_qua_bitset->words[++long_ctr]; - if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { - run_end = 64 + long_ctr * 64; // exclusive, I guess - add_run(answer, run_start, run_end - 1); - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE; - return answer; - } - int local_run_end = __builtin_ctzll(~cur_word_with_1s); - run_end = local_run_end + long_ctr * 64; - add_run(answer, run_start, run_end - 1); - cur_word = cur_word_with_1s & (cur_word_with_1s + 1); - } - return answer; - } else { - assert(false); - __builtin_unreachable(); - return NULL; - } -} +#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \ + neon_intrinsic) \ + static inline int _scalar_bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t *__restrict__ words_1 = src_1->words; \ + const uint64_t *__restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \ + out[i] = word_1; \ + out[i + 1] = word_2; \ + sum += hamming(word_1); \ + sum += hamming(word_2); \ + } \ + dst->cardinality = sum; \ + return dst->cardinality; \ + } \ + static inline int _scalar_bitset_container_##opname##_nocard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t *__restrict__ words_1 = src_1->words; \ + const uint64_t *__restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ + out[i] = (words_1[i])opsymbol(words_2[i]); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ + } \ + static inline int _scalar_bitset_container_##opname##_justcard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2) { \ + const uint64_t *__restrict__ words_1 = src_1->words; \ + const uint64_t *__restrict__ words_2 = src_2->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \ + sum += hamming(word_1); \ + sum += hamming(word_2); \ + } \ + return sum; \ + } -static container_t *container_from_run_range( - const run_container_t *run, - uint32_t min, uint32_t max, uint8_t *typecode_after -){ - // We expect most of the time to end up with a bitset container - bitset_container_t *bitset = bitset_container_create(); - *typecode_after = BITSET_CONTAINER_TYPE; - int32_t union_cardinality = 0; - int32_t i; for (i = 0; i < run->n_runs; ++i) { - uint32_t rle_min = run->runs[i].value; - uint32_t rle_max = rle_min + run->runs[i].length; - bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min); - union_cardinality += run->runs[i].length + 1; - } - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min); - bitset_set_lenrange(bitset->words, min, max - min); - bitset->cardinality = union_cardinality; - if(bitset->cardinality <= DEFAULT_MAX_SIZE) { - // we need to convert to an array container - array_container_t * array = array_container_from_bitset(bitset); - *typecode_after = ARRAY_CONTAINER_TYPE; - bitset_container_free(bitset); - return array; - } - return bitset; -} +// we duplicate the function because other containers use the "or" term, makes API more consistent +SCALAR_BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) +SCALAR_BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/convert.c */ -/* begin file src/containers/run.c */ -#include <stdio.h> -#include <stdlib.h> +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +SCALAR_BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) +SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) +SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) +SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) -#ifdef __cplusplus -extern "C" { namespace roaring { namespace internal { -#endif -extern inline uint16_t run_container_minimum(const run_container_t *run); -extern inline uint16_t run_container_maximum(const run_container_t *run); -extern inline int32_t interleavedBinarySearch(const rle16_t *array, - int32_t lenarray, uint16_t ikey); -extern inline bool run_container_contains(const run_container_t *run, - uint16_t pos); -extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); -extern inline bool run_container_is_full(const run_container_t *run); -extern inline bool run_container_nonzero_cardinality(const run_container_t *rc); -extern inline void run_container_clear(run_container_t *run); -extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); -extern inline run_container_t *run_container_create_range(uint32_t start, - uint32_t stop); -extern inline int run_container_cardinality(const run_container_t *run); +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ + int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + if ( croaring_avx2() ) { \ + return _avx2_bitset_container_##opname(src_1, src_2, dst); \ + } else { \ + return _scalar_bitset_container_##opname(src_1, src_2, dst); \ + } \ + } \ + int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + if ( croaring_avx2() ) { \ + return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } else { \ + return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } \ + } \ + int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + if ((croaring_detect_supported_architectures() & CROARING_AVX2) == \ + CROARING_AVX2) { \ + return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \ + } else { \ + return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \ + } \ + } -static bool run_container_add(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return false; // already there - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return false; // already there - if (offset == le + 1) { - // we may need to fuse - if (index + 1 < run->n_runs) { - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index].length = run->runs[index + 1].value + - run->runs[index + 1].length - - run->runs[index].value; - recoverRoomAtIndex(run, (uint16_t)(index + 1)); - return true; - } - } - run->runs[index].length++; - return true; - } - if (index + 1 < run->n_runs) { - // we may need to fuse - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index + 1].value = pos; - run->runs[index + 1].length = run->runs[index + 1].length + 1; - return true; - } - } - } - if (index == -1) { - // we may need to extend the first run - if (0 < run->n_runs) { - if (run->runs[0].value == pos + 1) { - run->runs[0].length++; - run->runs[0].value--; - return true; - } - } - } - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = pos; - run->runs[index + 1].length = 0; - return true; -} -/* Create a new run container. Return NULL in case of failure. */ -static run_container_t *run_container_create_given_capacity(int32_t size) { - run_container_t *run; - /* Allocate the run container itself. */ - if ((run = (run_container_t *)ndpi_malloc(sizeof(run_container_t))) == NULL) { - return NULL; - } - if (size <= 0 ) { // we don't want to rely on malloc(0) - run->runs = NULL; - } else if ((run->runs = (rle16_t *)ndpi_malloc(sizeof(rle16_t) * size)) == NULL) { - ndpi_free(run); - return NULL; - } - run->capacity = size; - run->n_runs = 0; - return run; -} +#elif defined(USENEON) -static int run_container_shrink_to_fit(run_container_t *src) { - if (src->n_runs == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->n_runs; - int old_capacity = src->capacity; - src->capacity = src->n_runs; - rle16_t *oldruns = src->runs; - src->runs = (rle16_t *)ndpi_realloc(oldruns, old_capacity * sizeof(rle16_t), src->capacity * sizeof(rle16_t)); - if (src->runs == NULL) ndpi_free(oldruns); // should never happen? - return savings; -} -/* Create a new run container. Return NULL in case of failure. */ -static run_container_t *run_container_create(void) { - return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + uint16x8_t n0 = vdupq_n_u16(0); \ + uint16x8_t n1 = vdupq_n_u16(0); \ + uint16x8_t n2 = vdupq_n_u16(0); \ + uint16x8_t n3 = vdupq_n_u16(0); \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ + vld1q_u64(&words_2[i + 0])); \ + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ + vst1q_u64(&out[i + 0], c0); \ + uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ + vld1q_u64(&words_2[i + 2])); \ + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ + vst1q_u64(&out[i + 2], c1); \ + uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ + vld1q_u64(&words_2[i + 4])); \ + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ + vst1q_u64(&out[i + 4], c2); \ + uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ + vld1q_u64(&words_2[i + 6])); \ + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ + vst1q_u64(&out[i + 6], c3); \ + } \ + uint64x2_t n = vdupq_n_u64(0); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ + dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ + vld1q_u64(&words_2[i + 0]))); \ + vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ + vld1q_u64(&words_2[i + 2]))); \ + vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ + vld1q_u64(&words_2[i + 4]))); \ + vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ + vld1q_u64(&words_2[i + 6]))); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint16x8_t n0 = vdupq_n_u16(0); \ + uint16x8_t n1 = vdupq_n_u16(0); \ + uint16x8_t n2 = vdupq_n_u16(0); \ + uint16x8_t n3 = vdupq_n_u16(0); \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ + vld1q_u64(&words_2[i + 0])); \ + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ + uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ + vld1q_u64(&words_2[i + 2])); \ + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ + uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ + vld1q_u64(&words_2[i + 4])); \ + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ + uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ + vld1q_u64(&words_2[i + 6])); \ + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ + } \ + uint64x2_t n = vdupq_n_u64(0); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ + return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ } -static run_container_t *run_container_clone(const run_container_t *src) { - run_container_t *run = run_container_create_given_capacity(src->capacity); - if (run == NULL) return NULL; - run->capacity = src->capacity; - run->n_runs = src->n_runs; - memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); - return run; -} +#else -/* Free memory. */ -static void run_container_free(run_container_t *run) { - if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise - ndpi_free(run->runs); - run->runs = NULL; // pedantic - } - ndpi_free(run); +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \ + out[i] = word_1; \ + out[i + 1] = word_2; \ + sum += hamming(word_1); \ + sum += hamming(word_2); \ + } \ + dst->cardinality = sum; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ + out[i] = (words_1[i])opsymbol(words_2[i]); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \ + sum += hamming(word_1); \ + sum += hamming(word_2); \ + } \ + return sum; \ } -static void run_container_grow(run_container_t *run, int32_t min, bool copy) { - int32_t newCapacity = - (run->capacity == 0) - ? RUN_DEFAULT_INIT_SIZE - : run->capacity < 64 ? run->capacity * 2 - : run->capacity < 1024 ? run->capacity * 3 / 2 - : run->capacity * 5 / 4; - int32_t old_capacity = run->capacity; - if (newCapacity < min) newCapacity = min; - run->capacity = newCapacity; - assert(run->capacity >= min); - if (copy) { - rle16_t *oldruns = run->runs; - run->runs = - (rle16_t *)ndpi_realloc(oldruns, old_capacity * sizeof(rle16_t), run->capacity * sizeof(rle16_t)); - if (run->runs == NULL) ndpi_free(oldruns); - } else { - // Jon Strabala reports that some tools complain otherwise - if (run->runs != NULL) { - ndpi_free(run->runs); - } - run->runs = (rle16_t *)ndpi_malloc(run->capacity * sizeof(rle16_t)); - } - // handle the case where realloc fails - if (run->runs == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(run->runs != NULL); -} +#endif // CROARING_IS_X64 -/* copy one container into another */ -static void run_container_copy(const run_container_t *src, run_container_t *dst) { - const int32_t n_runs = src->n_runs; - if (src->n_runs > dst->capacity) { - run_container_grow(dst, n_runs, false); - } - dst->n_runs = n_runs; - memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); -} +// we duplicate the function because other containers use the "or" term, makes API more consistent +BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) +BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -static void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // TODO: this could be a lot more efficient +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) +BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_1, dst); - return; - } - if (if2) { - run_container_copy(src_2, dst); - return; - } - } - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; +BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) +BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) +// clang-format On - rle16_t previousrle; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(dst, src_1->runs[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); - xrlepos++; - } - while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { - rle16_t newrl; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = src_1->runs[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(dst, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < src_1->n_runs) { - run_container_append(dst, src_1->runs[rlepos], &previousrle); - rlepos++; - } +ALLOW_UNALIGNED +int bitset_container_to_uint32_array( + uint32_t *out, + const bitset_container_t *bc, + uint32_t base +){ +#ifdef CROARING_IS_X64 + if(( croaring_avx2() ) && (bc->cardinality >= 8192)) // heuristic + return (int) bitset_extract_setbits_avx2(bc->words, + BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base); + else + return (int) bitset_extract_setbits(bc->words, + BITSET_CONTAINER_SIZE_IN_WORDS, out, base); +#else + return (int) bitset_extract_setbits(bc->words, + BITSET_CONTAINER_SIZE_IN_WORDS, out, base); +#endif } -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' +/* + * Print this container using printf (useful for debugging). */ -static void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return; - } - if (if2) { - run_container_copy(src_2, src_1); - return; - } - } - // we move the data to the end of the current array - const int32_t maxoutput = src_1->n_runs + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_1->n_runs; - if (src_1->capacity < neededcapacity) - run_container_grow(src_1, neededcapacity, true); - memmove(src_1->runs + maxoutput, src_1->runs, - src_1->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc1 = src_1->runs + maxoutput; - const int32_t input1nruns = src_1->n_runs; - src_1->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); - xrlepos++; - } - while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { - rle16_t newrl; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = inputsrc1[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(src_1, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(src_1, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < input1nruns) { - run_container_append(src_1, inputsrc1[rlepos], &previousrle); - rlepos++; - } +void bitset_container_printf(const bitset_container_t * v) { + printf("{"); + uint32_t base = 0; + bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { + uint64_t w = v->words[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(iamfirst) {// predicted to be false + printf("%u",base + r); + iamfirst = false; + } else { + printf(",%u",base + r); + } + w ^= t; + } + base += 64; + } + printf("}"); } -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -static void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // don't bother to convert xor with full range into negation - // since negation is implemented similarly - - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - - int32_t pos1 = 0; - int32_t pos2 = 0; - dst->n_runs = 0; - - while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { - if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } else { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } - } - while (pos1 < src_1->n_runs) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } - while (pos2 < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } +/* + * Print this container using printf as a comma-separated list of 32-bit integers starting at base. + */ +void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { + bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { + uint64_t w = v->words[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(iamfirst) {// predicted to be false + printf("%u", r + base); + iamfirst = false; + } else { + printf(",%u",r + base); + } + w ^= t; + } + base += 64; + } } -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -static void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_2, dst); - return; - } - if (if2) { - run_container_copy(src_1, dst); - return; - } - } - // TODO: this could be a lot more efficient, could use SIMD optimizations - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - dst->runs[dst->n_runs].value = (uint16_t)lateststart; - dst->runs[dst->n_runs].length = - (uint16_t)(earliestend - lateststart - 1); - dst->n_runs++; - } - } -} -/* Compute the size of the intersection of src_1 and src_2 . */ -static int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return run_container_cardinality(src_2); - } - if (if2) { - return run_container_cardinality(src_1); - } - } - int answer = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } +// TODO: use the fast lower bound, also +int bitset_container_number_of_runs(bitset_container_t *bc) { + int num_runs = 0; + uint64_t next_word = bc->words[0]; - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - answer += earliestend - lateststart; - } - } - return answer; -} + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { + uint64_t word = next_word; + next_word = bc->words[i+1]; + num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); + } -static bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return !run_container_empty(src_2); - } - if (if2) { - return !run_container_empty(src_1); - } - } - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - return true; - } - } - return false; + uint64_t word = next_word; + num_runs += hamming((~word) & (word << 1)); + if((word & 0x8000000000000000ULL) != 0) + num_runs++; + return num_runs; } -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -static void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // following Java implementation as of June 2016 - - if (dst->capacity < src_1->n_runs + src_2->n_runs) - run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); - - dst->n_runs = 0; - - int rlepos1 = 0; - int rlepos2 = 0; - int32_t start = src_1->runs[rlepos1].value; - int32_t end = start + src_1->runs[rlepos1].length + 1; - int32_t start2 = src_2->runs[rlepos2].value; - int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; - - while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { - if (end <= start2) { - // output the first run - dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1); - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } else if (end2 <= start) { - // exit the second run - rlepos2++; - if (rlepos2 < src_2->n_runs) { - start2 = src_2->runs[rlepos2].value; - end2 = start2 + src_2->runs[rlepos2].length + 1; - } - } else { - if (start < start2) { - dst->runs[dst->n_runs++] = - MAKE_RLE16(start, start2 - start - 1); - } - if (end2 < end) { - start = end2; - } else { - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } - } - } - if (rlepos1 < src_1->n_runs) { - dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1); - rlepos1++; - if (rlepos1 < src_1->n_runs) { - memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, - sizeof(rle16_t) * (src_1->n_runs - rlepos1)); - dst->n_runs += src_1->n_runs - rlepos1; - } - } -} - -static int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - int i = 0; for (i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - int j; for (j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - } - return outpos; +int32_t bitset_container_write(const bitset_container_t *container, + char *buf) { + memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + return bitset_container_size_in_bytes(container); } -/* - * Print this container using printf (useful for debugging). - */ -static void run_container_printf(const run_container_t *cont) { - int i = 0; for (i = 0; i < cont->n_runs; ++i) { - uint16_t run_start = cont->runs[i].value; - uint16_t le = cont->runs[i].length; - printf("[%d,%d]", run_start, run_start + le); - } -} -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -static void run_container_printf_as_uint32_array(const run_container_t *cont, - uint32_t base) { - if (cont->n_runs == 0) return; - { - uint32_t run_start = base + cont->runs[0].value; - uint16_t le = cont->runs[0].length; - printf("%u", run_start); - uint32_t j; for (j = 1; j <= le; ++j) printf(",%u", run_start + j); - } - int32_t i; for (i = 1; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - uint32_t j; for (j = 0; j <= le; ++j) printf(",%u", run_start + j); - } -} - -static int32_t run_container_write(const run_container_t *container, char *buf) { - memcpy(buf, &container->n_runs, sizeof(uint16_t)); - memcpy(buf + sizeof(uint16_t), container->runs, - container->n_runs * sizeof(rle16_t)); - return run_container_size_in_bytes(container); +int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, + const char *buf) { + container->cardinality = cardinality; + memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + return bitset_container_size_in_bytes(container); } -static int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf) { - (void)cardinality; - memcpy(&container->n_runs, buf, sizeof(uint16_t)); - if (container->n_runs > container->capacity) - run_container_grow(container, container->n_runs, false); - if(container->n_runs > 0) { - memcpy(container->runs, buf + sizeof(uint16_t), - container->n_runs * sizeof(rle16_t)); +bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + uint64_t w = cont->words[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(!iterator(r + base, ptr)) return false; + w ^= t; } - return run_container_size_in_bytes(container); + base += 64; + } + return true; } -static bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - int i = 0; for (i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - int j; for (j = 0; j <= le; ++j) - if (!iterator(run_start + j, ptr)) return false; +bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + uint64_t w = cont->words[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; + w ^= t; } - return true; + base += 64; + } + return true; } -static bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - int i = 0; for (i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - int j; for (j = 0; j <= le; ++j) - if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) - return false; - } - return true; +#ifdef CROARING_IS_X64 +CROARING_TARGET_AVX2 +ALLOW_UNALIGNED +static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { + const __m256i *ptr1 = (const __m256i*)container1->words; + const __m256i *ptr2 = (const __m256i*)container2->words; + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) { + __m256i r1 = _mm256_loadu_si256(ptr1+i); + __m256i r2 = _mm256_loadu_si256(ptr2+i); + int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); + if ((uint32_t)mask != UINT32_MAX) { + return false; + } + } + return true; } +CROARING_UNTARGET_REGION +#endif // CROARING_IS_X64 -static bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2) { - int i1 = 0, i2 = 0; - while (i1 < container1->n_runs && i2 < container2->n_runs) { - int start1 = container1->runs[i1].value; - int stop1 = start1 + container1->runs[i1].length; - int start2 = container2->runs[i2].value; - int stop2 = start2 + container2->runs[i2].length; - if (start1 < start2) { - return false; - } else { // start1 >= start2 - if (stop1 < stop2) { - i1++; - } else if (stop1 == stop2) { - i1++; - i2++; - } else { // stop1 > stop2 - i2++; - } - } +ALLOW_UNALIGNED +bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { + if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { + if(container1->cardinality != container2->cardinality) { + return false; } - if (i1 == container1->n_runs) { - return true; - } else { - return false; + if (container1->cardinality == INT32_C(0x10000)) { + return true; } + } +#ifdef CROARING_IS_X64 + if( croaring_avx2() ) { + return _avx2_bitset_container_equals(container1, container2); + } +#endif + return memcmp(container1->words, + container2->words, + BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0; } -// TODO: write smart_append_exclusive version to match the overloaded 1 param -// Java version (or is it even used?) - -// follows the Java implementation closely -// length is the rle-value. Ie, run [10,12) uses a length value 1. -static void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length) { - int old_end; - rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; - rle16_t *appended_last_run = src->runs + src->n_runs; - - if (!src->n_runs || - (start > (old_end = last_run->value + last_run->length + 1))) { - *appended_last_run = MAKE_RLE16(start, length); - src->n_runs++; - return; - } - if (old_end == start) { - // we merge - last_run->length += (length + 1); - return; - } - int new_end = start + length + 1; - - if (start == last_run->value) { - // wipe out previous - if (new_end < old_end) { - *last_run = MAKE_RLE16(new_end, old_end - new_end - 1); - return; - } else if (new_end > old_end) { - *last_run = MAKE_RLE16(old_end, new_end - old_end - 1); - return; - } else { - src->n_runs--; - return; +bool bitset_container_is_subset(const bitset_container_t *container1, + const bitset_container_t *container2) { + if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { + if(container1->cardinality > container2->cardinality) { + return false; } } - last_run->length = start - last_run->value - 1; - if (new_end < old_end) { - *appended_last_run = MAKE_RLE16(new_end, old_end - new_end - 1); - src->n_runs++; - } else if (new_end > old_end) { - *appended_last_run = MAKE_RLE16(old_end, new_end - old_end - 1); - src->n_runs++; - } + for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + if((container1->words[i] & container2->words[i]) != container1->words[i]) { + return false; + } + } + return true; } -static bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - int i = 0; for (i = 0; i < container->n_runs; i++) { - uint16_t length = container->runs[i].length; - if (rank <= *start_rank + length) { - uint16_t value = container->runs[i].value; - *element = value + rank - (*start_rank); - return true; - } else - *start_rank += length + 1; +bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { + int card = bitset_container_cardinality(container); + if(rank >= *start_rank + card) { + *start_rank += card; + return false; } - return false; -} - -static int run_container_rank(const run_container_t *container, uint16_t x) { - int sum = 0; - uint32_t x32 = x; - int i = 0; for (i = 0; i < container->n_runs; i++) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x <= endpoint) { - if (x < startpoint) break; - return sum + (x32 - startpoint) + 1; - } else { - sum += length + 1; + const uint64_t *words = container->words; + int32_t size; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { + size = hamming(words[i]); + if(rank <= *start_rank + size) { + uint64_t w = container->words[i]; + uint16_t base = i*64; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(*start_rank == rank) { + *element = r+base; + return true; + } + w ^= t; + *start_rank += 1; + } } + else + *start_rank += size; } - return sum; + assert(false); + __builtin_unreachable(); } -#ifdef CROARING_IS_X64 -CROARING_TARGET_AVX2 -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int _avx2_run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int32_t k = 0; - const int32_t step = sizeof(__m256i) / sizeof(rle16_t); - if (n_runs > step) { - __m256i total = _mm256_setzero_si256(); - for (; k + step <= n_runs; k += step) { - __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); - __m256i justlengths = _mm256_srli_epi32(ymm1, 16); - total = _mm256_add_epi32(total, justlengths); - } - // a store might be faster than extract? - uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; - _mm256_storeu_si256((__m256i *)buffer, total); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); - } - for (; k < n_runs; ++k) { - sum += runs[k].length; +/* Returns the smallest value (assumes not empty) */ +uint16_t bitset_container_minimum(const bitset_container_t *container) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + uint64_t w = container->words[i]; + if (w != 0) { + int r = __builtin_ctzll(w); + return r + i * 64; } - - return sum; + } + return UINT16_MAX; } -CROARING_UNTARGET_REGION - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int _scalar_run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - for (int k = 0; k < n_runs; ++k) { - sum += runs[k].length; +/* Returns the largest value (assumes not empty) */ +uint16_t bitset_container_maximum(const bitset_container_t *container) { + for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { + uint64_t w = container->words[i]; + if (w != 0) { + int r = __builtin_clzll(w); + return i * 64 + 63 - r; } - - return sum; + } + return 0; } -static int run_container_cardinality(const run_container_t *run) { - if( croaring_avx2() ) { - return _avx2_run_container_cardinality(run); - } else { - return _scalar_run_container_cardinality(run); +/* Returns the number of values equal or smaller than x */ +int bitset_container_rank(const bitset_container_t *container, uint16_t x) { + // credit: aqrit + int sum = 0; + int i = 0; + for (int end = x / 64; i < end; i++){ + sum += hamming(container->words[i]); } + uint64_t lastword = container->words[i]; + uint64_t lastpos = UINT64_C(1) << (x % 64); + uint64_t mask = lastpos + lastpos - 1; // smear right + sum += hamming(lastword & mask); + return sum; } -#else - -/* Get the cardinality of `run'. Requires an actual computation. */ -static int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int k; for (k = 0; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; +/* Returns the index of the first value equal or larger than x, or -1 */ +int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { + uint32_t x32 = x; + uint32_t k = x32 / 64; + uint64_t word = container->words[k]; + const int diff = x32 - k * 64; // in [0,64) + word = (word >> diff) << diff; // a mask is faster, but we don't care + while(word == 0) { + k++; + if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; + word = container->words[k]; + } + return k * 64 + __builtin_ctzll(word); } -#endif - #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { #endif -/* end file src/containers/run.c */ +/* end file src/containers/bitset.c */ /* begin file src/containers/containers.c */ @@ -15195,7 +11483,7 @@ extern inline container_t *container_iandnot( const container_t *c2, uint8_t type2, uint8_t *result_type); -static void container_free(container_t *c, uint8_t type) { +void container_free(container_t *c, uint8_t type) { switch (type) { case BITSET_CONTAINER_TYPE: bitset_container_free(CAST_bitset(c)); @@ -15215,7 +11503,7 @@ static void container_free(container_t *c, uint8_t type) { } } -static void container_printf(const container_t *c, uint8_t type) { +void container_printf(const container_t *c, uint8_t type) { c = container_unwrap_shared(c, &type); switch (type) { case BITSET_CONTAINER_TYPE: @@ -15232,7 +11520,7 @@ static void container_printf(const container_t *c, uint8_t type) { } } -static void container_printf_as_uint32_array( +void container_printf_as_uint32_array( const container_t *c, uint8_t typecode, uint32_t base ){ @@ -15289,7 +11577,7 @@ extern inline container_t *container_xor( const container_t *c2, uint8_t type2, uint8_t *result_type); -static container_t *get_copy_of_container( +container_t *get_copy_of_container( container_t *c, uint8_t *typecode, bool copy_on_write ){ @@ -15302,7 +11590,7 @@ static container_t *get_copy_of_container( } assert(*typecode != SHARED_CONTAINER_TYPE); - if ((shared_container = (shared_container_t *)ndpi_malloc( + if ((shared_container = (shared_container_t *)roaring_malloc( sizeof(shared_container_t))) == NULL) { return NULL; } @@ -15325,7 +11613,7 @@ static container_t *get_copy_of_container( * Copies a container, requires a typecode. This allocates new memory, caller * is responsible for deallocation. */ -static container_t *container_clone(const container_t *c, uint8_t typecode) { +container_t *container_clone(const container_t *c, uint8_t typecode) { // We do not want to allow cloning of shared containers. // c = container_unwrap_shared(c, &typecode); switch (typecode) { @@ -15345,7 +11633,7 @@ static container_t *container_clone(const container_t *c, uint8_t typecode) { } } -static container_t *shared_container_extract_copy( +container_t *shared_container_extract_copy( shared_container_t *sc, uint8_t *typecode ){ assert(sc->counter > 0); @@ -15356,7 +11644,7 @@ static container_t *shared_container_extract_copy( if (sc->counter == 0) { answer = sc->container; sc->container = NULL; // paranoid - ndpi_free(sc); + roaring_free(sc); } else { answer = container_clone(sc->container, *typecode); } @@ -15364,14 +11652,14 @@ static container_t *shared_container_extract_copy( return answer; } -static void shared_container_free(shared_container_t *container) { +void shared_container_free(shared_container_t *container) { assert(container->counter > 0); container->counter--; if (container->counter == 0) { assert(container->typecode != SHARED_CONTAINER_TYPE); container_free(container->container, container->typecode); container->container = NULL; // paranoid - ndpi_free(container); + roaring_free(container); } } @@ -15417,6 +11705,340 @@ extern inline container_t *container_andnot( } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/containers.c */ +/* begin file src/containers/convert.c */ +#include <stdio.h> + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +// file contains grubby stuff that must know impl. details of all container +// types. +bitset_container_t *bitset_container_from_array(const array_container_t *ac) { + bitset_container_t *ans = bitset_container_create(); + int limit = array_container_cardinality(ac); + for (int i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]); + return ans; +} + +bitset_container_t *bitset_container_from_run(const run_container_t *arr) { + int card = run_container_cardinality(arr); + bitset_container_t *answer = bitset_container_create(); + for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { + rle16_t vl = arr->runs[rlepos]; + bitset_set_lenrange(answer->words, vl.value, vl.length); + } + answer->cardinality = card; + return answer; +} + +array_container_t *array_container_from_run(const run_container_t *arr) { + array_container_t *answer = + array_container_create_given_capacity(run_container_cardinality(arr)); + answer->cardinality = 0; + for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { + int run_start = arr->runs[rlepos].value; + int run_end = run_start + arr->runs[rlepos].length; + + for (int run_value = run_start; run_value <= run_end; ++run_value) { + answer->array[answer->cardinality++] = (uint16_t)run_value; + } + } + return answer; +} + +array_container_t *array_container_from_bitset(const bitset_container_t *bits) { + array_container_t *result = + array_container_create_given_capacity(bits->cardinality); + result->cardinality = bits->cardinality; + // sse version ends up being slower here + // (bitset_extract_setbits_sse_uint16) + // because of the sparsity of the data + bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, + result->array, 0); + return result; +} + +/* assumes that container has adequate space. Run from [s,e] (inclusive) */ +static void add_run(run_container_t *rc, int s, int e) { + rc->runs[rc->n_runs].value = s; + rc->runs[rc->n_runs].length = e - s; + rc->n_runs++; +} + +run_container_t *run_container_from_array(const array_container_t *c) { + int32_t n_runs = array_container_number_of_runs(c); + run_container_t *answer = run_container_create_given_capacity(n_runs); + int prev = -2; + int run_start = -1; + int32_t card = c->cardinality; + if (card == 0) return answer; + for (int i = 0; i < card; ++i) { + const uint16_t cur_val = c->array[i]; + if (cur_val != prev + 1) { + // new run starts; flush old one, if any + if (run_start != -1) add_run(answer, run_start, prev); + run_start = cur_val; + } + prev = c->array[i]; + } + // now prev is the last seen value + add_run(answer, run_start, prev); + // assert(run_container_cardinality(answer) == c->cardinality); + return answer; +} + +/** + * Convert the runcontainer to either a Bitmap or an Array Container, depending + * on the cardinality. Frees the container. + * Allocates and returns new container, which caller is responsible for freeing. + * It does not free the run container. + */ +container_t *convert_to_bitset_or_array_container( + run_container_t *rc, int32_t card, + uint8_t *resulttype +){ + if (card <= DEFAULT_MAX_SIZE) { + array_container_t *answer = array_container_create_given_capacity(card); + answer->cardinality = 0; + for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) { + uint16_t run_start = rc->runs[rlepos].value; + uint16_t run_end = run_start + rc->runs[rlepos].length; + for (uint16_t run_value = run_start; run_value < run_end; + ++run_value) { + answer->array[answer->cardinality++] = run_value; + } + answer->array[answer->cardinality++] = run_end; + } + assert(card == answer->cardinality); + *resulttype = ARRAY_CONTAINER_TYPE; + //run_container_free(r); + return answer; + } + bitset_container_t *answer = bitset_container_create(); + for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) { + uint16_t run_start = rc->runs[rlepos].value; + bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length); + } + answer->cardinality = card; + *resulttype = BITSET_CONTAINER_TYPE; + //run_container_free(r); + return answer; +} + +/* Converts a run container to either an array or a bitset, IF it saves space. + */ +/* If a conversion occurs, the caller is responsible to free the original + * container and + * he becomes responsible to free the new one. */ +container_t *convert_run_to_efficient_container( + run_container_t *c, + uint8_t *typecode_after +){ + int32_t size_as_run_container = + run_container_serialized_size_in_bytes(c->n_runs); + + int32_t size_as_bitset_container = + bitset_container_serialized_size_in_bytes(); + int32_t card = run_container_cardinality(c); + int32_t size_as_array_container = + array_container_serialized_size_in_bytes(card); + + int32_t min_size_non_run = + size_as_bitset_container < size_as_array_container + ? size_as_bitset_container + : size_as_array_container; + if (size_as_run_container <= min_size_non_run) { // no conversion + *typecode_after = RUN_CONTAINER_TYPE; + return c; + } + if (card <= DEFAULT_MAX_SIZE) { + // to array + array_container_t *answer = array_container_create_given_capacity(card); + answer->cardinality = 0; + for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { + int run_start = c->runs[rlepos].value; + int run_end = run_start + c->runs[rlepos].length; + + for (int run_value = run_start; run_value <= run_end; ++run_value) { + answer->array[answer->cardinality++] = (uint16_t)run_value; + } + } + *typecode_after = ARRAY_CONTAINER_TYPE; + return answer; + } + + // else to bitset + bitset_container_t *answer = bitset_container_create(); + + for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { + int start = c->runs[rlepos].value; + int end = start + c->runs[rlepos].length; + bitset_set_range(answer->words, start, end + 1); + } + answer->cardinality = card; + *typecode_after = BITSET_CONTAINER_TYPE; + return answer; +} + +// like convert_run_to_efficient_container but frees the old result if needed +container_t *convert_run_to_efficient_container_and_free( + run_container_t *c, + uint8_t *typecode_after +){ + container_t *answer = convert_run_to_efficient_container(c, typecode_after); + if (answer != c) run_container_free(c); + return answer; +} + +/* once converted, the original container is disposed here, rather than + in roaring_array +*/ + +// TODO: split into run- array- and bitset- subfunctions for sanity; +// a few function calls won't really matter. + +container_t *convert_run_optimize( + container_t *c, uint8_t typecode_original, + uint8_t *typecode_after +){ + if (typecode_original == RUN_CONTAINER_TYPE) { + container_t *newc = convert_run_to_efficient_container( + CAST_run(c), typecode_after); + if (newc != c) { + container_free(c, typecode_original); + } + return newc; + } else if (typecode_original == ARRAY_CONTAINER_TYPE) { + // it might need to be converted to a run container. + array_container_t *c_qua_array = CAST_array(c); + int32_t n_runs = array_container_number_of_runs(c_qua_array); + int32_t size_as_run_container = + run_container_serialized_size_in_bytes(n_runs); + int32_t card = array_container_cardinality(c_qua_array); + int32_t size_as_array_container = + array_container_serialized_size_in_bytes(card); + + if (size_as_run_container >= size_as_array_container) { + *typecode_after = ARRAY_CONTAINER_TYPE; + return c; + } + // else convert array to run container + run_container_t *answer = run_container_create_given_capacity(n_runs); + int prev = -2; + int run_start = -1; + + assert(card > 0); + for (int i = 0; i < card; ++i) { + uint16_t cur_val = c_qua_array->array[i]; + if (cur_val != prev + 1) { + // new run starts; flush old one, if any + if (run_start != -1) add_run(answer, run_start, prev); + run_start = cur_val; + } + prev = c_qua_array->array[i]; + } + assert(run_start >= 0); + // now prev is the last seen value + add_run(answer, run_start, prev); + *typecode_after = RUN_CONTAINER_TYPE; + array_container_free(c_qua_array); + return answer; + } else if (typecode_original == + BITSET_CONTAINER_TYPE) { // run conversions on bitset + // does bitset need conversion to run? + bitset_container_t *c_qua_bitset = CAST_bitset(c); + int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); + int32_t size_as_run_container = + run_container_serialized_size_in_bytes(n_runs); + int32_t size_as_bitset_container = + bitset_container_serialized_size_in_bytes(); + + if (size_as_bitset_container <= size_as_run_container) { + // no conversion needed. + *typecode_after = BITSET_CONTAINER_TYPE; + return c; + } + // bitset to runcontainer (ported from Java RunContainer( + // BitmapContainer bc, int nbrRuns)) + assert(n_runs > 0); // no empty bitmaps + run_container_t *answer = run_container_create_given_capacity(n_runs); + + int long_ctr = 0; + uint64_t cur_word = c_qua_bitset->words[0]; + while (true) { + while (cur_word == UINT64_C(0) && + long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) + cur_word = c_qua_bitset->words[++long_ctr]; + + if (cur_word == UINT64_C(0)) { + bitset_container_free(c_qua_bitset); + *typecode_after = RUN_CONTAINER_TYPE; + return answer; + } + + int local_run_start = __builtin_ctzll(cur_word); + int run_start = local_run_start + 64 * long_ctr; + uint64_t cur_word_with_1s = cur_word | (cur_word - 1); + + int run_end = 0; + while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && + long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) + cur_word_with_1s = c_qua_bitset->words[++long_ctr]; + + if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { + run_end = 64 + long_ctr * 64; // exclusive, I guess + add_run(answer, run_start, run_end - 1); + bitset_container_free(c_qua_bitset); + *typecode_after = RUN_CONTAINER_TYPE; + return answer; + } + int local_run_end = __builtin_ctzll(~cur_word_with_1s); + run_end = local_run_end + long_ctr * 64; + add_run(answer, run_start, run_end - 1); + cur_word = cur_word_with_1s & (cur_word_with_1s + 1); + } + return answer; + } else { + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +container_t *container_from_run_range( + const run_container_t *run, + uint32_t min, uint32_t max, uint8_t *typecode_after +){ + // We expect most of the time to end up with a bitset container + bitset_container_t *bitset = bitset_container_create(); + *typecode_after = BITSET_CONTAINER_TYPE; + int32_t union_cardinality = 0; + for (int32_t i = 0; i < run->n_runs; ++i) { + uint32_t rle_min = run->runs[i].value; + uint32_t rle_max = rle_min + run->runs[i].length; + bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min); + union_cardinality += run->runs[i].length + 1; + } + union_cardinality += max - min + 1; + union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min); + bitset_set_lenrange(bitset->words, min, max - min); + bitset->cardinality = union_cardinality; + if(bitset->cardinality <= DEFAULT_MAX_SIZE) { + // we need to convert to an array container + array_container_t * array = array_container_from_bitset(bitset); + *typecode_after = ARRAY_CONTAINER_TYPE; + bitset_container_free(bitset); + return array; + } + return bitset; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/convert.c */ /* begin file src/containers/mixed_andnot.c */ /* * mixed_andnot.c. More methods since operation is not symmetric, @@ -15433,7 +12055,7 @@ extern "C" { namespace roaring { namespace internal { /* Compute the andnot of src_1 and src_2 and write the result to * dst, a valid array container that could be the same as dst.*/ -static void array_bitset_container_andnot(const array_container_t *src_1, +void array_bitset_container_andnot(const array_container_t *src_1, const bitset_container_t *src_2, array_container_t *dst) { // follows Java implementation as of June 2016 @@ -15442,7 +12064,7 @@ static void array_bitset_container_andnot(const array_container_t *src_1, } int32_t newcard = 0; const int32_t origcard = src_1->cardinality; - int i = 0; for (i = 0; i < origcard; ++i) { + for (int i = 0; i < origcard; ++i) { uint16_t key = src_1->array[i]; dst->array[newcard] = key; newcard += 1 - bitset_container_contains(src_2, key); @@ -15453,7 +12075,7 @@ static void array_bitset_container_andnot(const array_container_t *src_1, /* Compute the andnot of src_1 and src_2 and write the result to * src_1 */ -static void array_bitset_container_iandnot(array_container_t *src_1, +void array_bitset_container_iandnot(array_container_t *src_1, const bitset_container_t *src_2) { array_bitset_container_andnot(src_1, src_2, src_1); } @@ -15463,7 +12085,7 @@ static void array_bitset_container_iandnot(array_container_t *src_1, * Return true for a bitset result; false for array */ -static bool bitset_array_container_andnot( +bool bitset_array_container_andnot( const bitset_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -15491,7 +12113,7 @@ static bool bitset_array_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_array_container_iandnot( +bool bitset_array_container_iandnot( bitset_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -15515,7 +12137,7 @@ static bool bitset_array_container_iandnot( * result true) or an array container. */ -static bool run_bitset_container_andnot( +bool run_bitset_container_andnot( const run_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -15525,9 +12147,9 @@ static bool run_bitset_container_andnot( // must be an array array_container_t *answer = array_container_create_given_capacity(card); answer->cardinality = 0; - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; - int run_value; for (run_value = rle.value; run_value <= rle.value + rle.length; + for (int run_value = rle.value; run_value <= rle.value + rle.length; ++run_value) { if (!bitset_container_get(src_2, (uint16_t)run_value)) { answer->array[answer->cardinality++] = (uint16_t)run_value; @@ -15541,7 +12163,7 @@ static bool run_bitset_container_andnot( bitset_container_t *answer = bitset_container_clone(src_2); uint32_t last_pos = 0; - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; uint32_t start = rle.value; @@ -15571,7 +12193,7 @@ static bool run_bitset_container_andnot( * result true) or an array container. */ -static bool run_bitset_container_iandnot( +bool run_bitset_container_iandnot( run_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -15588,7 +12210,7 @@ static bool run_bitset_container_iandnot( * result true) or an array container. */ -static bool bitset_run_container_andnot( +bool bitset_run_container_andnot( const bitset_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -15596,7 +12218,7 @@ static bool bitset_run_container_andnot( bitset_container_t *result = bitset_container_create(); bitset_container_copy(src_1, result); - int32_t rlepos; for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { rle16_t rle = src_2->runs[rlepos]; bitset_reset_range(result->words, rle.value, rle.value + rle.length + UINT32_C(1)); @@ -15619,13 +12241,13 @@ static bool bitset_run_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_run_container_iandnot( +bool bitset_run_container_iandnot( bitset_container_t *src_1, const run_container_t *src_2, container_t **dst ){ *dst = src_1; - int32_t rlepos; for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { rle16_t rle = src_2->runs[rlepos]; bitset_reset_range(src_1->words, rle.value, rle.value + rle.length + UINT32_C(1)); @@ -15655,7 +12277,7 @@ static int run_array_array_subtract(const run_container_t *rc, int32_t in_array_pos = -1; // since advanceUntil always assumes we start the search AFTER this - int rlepos; for (rlepos = 0; rlepos < rc->n_runs; rlepos++) { + for (int rlepos = 0; rlepos < rc->n_runs; rlepos++) { int32_t start = rc->runs[rlepos].value; int32_t end = start + rc->runs[rlepos].length + 1; @@ -15663,17 +12285,17 @@ static int run_array_array_subtract(const run_container_t *rc, a_in->cardinality, (uint16_t)start); if (in_array_pos >= a_in->cardinality) { // run has no items subtracted - int32_t i; for (i = start; i < end; ++i) + for (int32_t i = start; i < end; ++i) a_out->array[out_card++] = (uint16_t)i; } else { uint16_t next_nonincluded = a_in->array[in_array_pos]; if (next_nonincluded >= end) { // another case when run goes unaltered - int32_t i; for (i = start; i < end; ++i) + for (int32_t i = start; i < end; ++i) a_out->array[out_card++] = (uint16_t)i; in_array_pos--; // ensure we see this item again if necessary } else { - int32_t i; for (i = start; i < end; ++i) + for (int32_t i = start; i < end; ++i) if (i != next_nonincluded) a_out->array[out_card++] = (uint16_t)i; else // 0 should ensure we don't match @@ -15692,7 +12314,7 @@ static int run_array_array_subtract(const run_container_t *rc, * can become any type of container. */ -static int run_array_container_andnot( +int run_array_container_andnot( const run_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -15786,7 +12408,7 @@ static int run_array_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static int run_array_container_iandnot( +int run_array_container_iandnot( run_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -15798,7 +12420,7 @@ static int run_array_container_iandnot( /* dst must be a valid array container, allowed to be src_1 */ -static void array_run_container_andnot(const array_container_t *src_1, +void array_run_container_andnot(const array_container_t *src_1, const run_container_t *src_2, array_container_t *dst) { // basically following Java impl as of June 2016 @@ -15818,7 +12440,7 @@ static void array_run_container_andnot(const array_container_t *src_1, uint16_t val = 0; int dest_card = 0; - int i = 0; for (i = 0; i < src_1->cardinality; ++i) { + for (int i = 0; i < src_1->cardinality; ++i) { val = src_1->array[i]; if (val < run_start) dst->array[dest_card++] = val; @@ -15844,7 +12466,7 @@ static void array_run_container_andnot(const array_container_t *src_1, * can become any kind of container. */ -static void array_run_container_iandnot(array_container_t *src_1, +void array_run_container_iandnot(array_container_t *src_1, const run_container_t *src_2) { array_run_container_andnot(src_1, src_2, src_1); } @@ -15853,7 +12475,7 @@ static void array_run_container_iandnot(array_container_t *src_1, * can become any kind of container. */ -static int run_run_container_andnot( +int run_run_container_andnot( const run_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -15871,7 +12493,7 @@ static int run_run_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static int run_run_container_iandnot( +int run_run_container_iandnot( run_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -15885,7 +12507,7 @@ static int run_run_container_iandnot( * dst is a valid array container and may be the same as src_1 */ -static void array_array_container_andnot(const array_container_t *src_1, +void array_array_container_andnot(const array_container_t *src_1, const array_container_t *src_2, array_container_t *dst) { array_container_andnot(src_1, src_2, dst); @@ -15893,7 +12515,7 @@ static void array_array_container_andnot(const array_container_t *src_1, /* inplace array-array andnot will always be able to reuse the space of * src_1 */ -static void array_array_container_iandnot(array_container_t *src_1, +void array_array_container_iandnot(array_container_t *src_1, const array_container_t *src_2) { array_container_andnot(src_1, src_2, src_1); } @@ -15903,7 +12525,7 @@ static void array_array_container_iandnot(array_container_t *src_1, * "dst is a bitset" */ -static bool bitset_bitset_container_andnot( +bool bitset_bitset_container_andnot( const bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -15926,7 +12548,7 @@ static bool bitset_bitset_container_andnot( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_bitset_container_iandnot( +bool bitset_bitset_container_iandnot( bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -15945,370 +12567,13 @@ static bool bitset_bitset_container_iandnot( } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/mixed_andnot.c */ -/* begin file src/containers/mixed_negation.c */ -/* - * mixed_negation.c - * - */ - -#include <assert.h> -#include <string.h> - - -#ifdef __cplusplus -extern "C" { namespace roaring { namespace internal { -#endif - -// TODO: make simplified and optimized negation code across -// the full range. - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap -' * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -static void array_container_negation(const array_container_t *src, - bitset_container_t *dst) { - uint64_t card = UINT64_C(1 << 16); - bitset_container_set_all(dst); - - if (src->cardinality == 0) { - return; - } - - dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array, - (uint64_t)src->cardinality); -} - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -static bool bitset_container_negation( - const bitset_container_t *src, container_t **dst -){ - return bitset_container_negation_range(src, 0, (1 << 16), dst); -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -static bool bitset_container_negation_inplace( - bitset_container_t *src, container_t **dst -){ - return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -static int run_container_negation(const run_container_t *src, container_t **dst) { - return run_container_negation_range(src, 0, (1 << 16), dst); -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -static int run_container_negation_inplace(run_container_t *src, container_t **dst) { - return run_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -static bool array_container_negation_range( - const array_container_t *src, - const int range_start, const int range_end, - container_t **dst -){ - /* close port of the Java implementation */ - if (range_start >= range_end) { - *dst = array_container_clone(src); - return false; - } - - int32_t start_index = - binarySearch(src->array, src->cardinality, (uint16_t)range_start); - if (start_index < 0) start_index = -start_index - 1; - - int32_t last_index = - binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); - if (last_index < 0) last_index = -last_index - 2; - - const int32_t current_values_in_range = last_index - start_index + 1; - const int32_t span_to_be_flipped = range_end - range_start; - const int32_t new_values_in_range = - span_to_be_flipped - current_values_in_range; - const int32_t cardinality_change = - new_values_in_range - current_values_in_range; - const int32_t new_cardinality = src->cardinality + cardinality_change; - - if (new_cardinality > DEFAULT_MAX_SIZE) { - bitset_container_t *temp = bitset_container_from_array(src); - bitset_flip_range(temp->words, (uint32_t)range_start, - (uint32_t)range_end); - temp->cardinality = new_cardinality; - *dst = temp; - return true; - } - - array_container_t *arr = - array_container_create_given_capacity(new_cardinality); - *dst = (container_t *)arr; - if(new_cardinality == 0) { - arr->cardinality = new_cardinality; - return false; // we are done. - } - // copy stuff before the active area - memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); - - // work on the range - int32_t out_pos = start_index, in_pos = start_index; - int32_t val_in_range = range_start; - for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { - if ((uint16_t)val_in_range != src->array[in_pos]) { - arr->array[out_pos++] = (uint16_t)val_in_range; - } else { - ++in_pos; - } - } - for (; val_in_range < range_end; ++val_in_range) - arr->array[out_pos++] = (uint16_t)val_in_range; - - // content after the active range - memcpy(arr->array + out_pos, src->array + (last_index + 1), - (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); - arr->cardinality = new_cardinality; - return false; -} - -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. - */ - -static bool array_container_negation_range_inplace( - array_container_t *src, - const int range_start, const int range_end, - container_t **dst -){ - bool ans = array_container_negation_range(src, range_start, range_end, dst); - // TODO : try a real inplace version - array_container_free(src); - return ans; -} - -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -static bool bitset_container_negation_range( - const bitset_container_t *src, - const int range_start, const int range_end, - container_t **dst -){ - // TODO maybe consider density-based estimate - // and sometimes build result directly as array, with - // conversion back to bitset if wrong. Or determine - // actual result cardinality, then go directly for the known final cont. - - // keep computation using bitsets as long as possible. - bitset_container_t *t = bitset_container_clone(src); - bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end); - t->cardinality = bitset_container_compute_cardinality(t); - - if (t->cardinality > DEFAULT_MAX_SIZE) { - *dst = t; - return true; - } else { - *dst = array_container_from_bitset(t); - bitset_container_free(t); - return false; - } -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -static bool bitset_container_negation_range_inplace( - bitset_container_t *src, - const int range_start, const int range_end, - container_t **dst -){ - bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end); - src->cardinality = bitset_container_compute_cardinality(src); - if (src->cardinality > DEFAULT_MAX_SIZE) { - *dst = src; - return true; - } - *dst = array_container_from_bitset(src); - bitset_container_free(src); - return false; -} - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -static int run_container_negation_range( - const run_container_t *src, - const int range_start, const int range_end, - container_t **dst -){ - uint8_t return_typecode; - - // follows the Java implementation - if (range_end <= range_start) { - *dst = run_container_clone(src); - return RUN_CONTAINER_TYPE; - } - - run_container_t *ans = run_container_create_given_capacity( - src->n_runs + 1); // src->n_runs + 1); - int k = 0; - for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { - ans->runs[k] = src->runs[k]; - ans->n_runs++; - } - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < src->n_runs; ++k) { - run_container_smart_append_exclusive(ans, src->runs[k].value, - src->runs[k].length); - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); - - return return_typecode; -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -static int run_container_negation_range_inplace( - run_container_t *src, - const int range_start, const int range_end, - container_t **dst -){ - uint8_t return_typecode; - - if (range_end <= range_start) { - *dst = src; - return RUN_CONTAINER_TYPE; - } - - // TODO: efficient special case when range is 0 to 65535 inclusive - - if (src->capacity == src->n_runs) { - // no excess room. More checking to see if result can fit - bool last_val_before_range = false; - bool first_val_in_range = false; - bool last_val_in_range = false; - bool first_val_past_range = false; - - if (range_start > 0) - last_val_before_range = - run_container_contains(src, (uint16_t)(range_start - 1)); - first_val_in_range = run_container_contains(src, (uint16_t)range_start); - - if (last_val_before_range == first_val_in_range) { - last_val_in_range = - run_container_contains(src, (uint16_t)(range_end - 1)); - if (range_end != 0x10000) - first_val_past_range = - run_container_contains(src, (uint16_t)range_end); - - if (last_val_in_range == - first_val_past_range) { // no space for inplace - int ans = run_container_negation_range(src, range_start, - range_end, dst); - run_container_free(src); - return ans; - } - } - } - // all other cases: result will fit - - run_container_t *ans = src; - int my_nbr_runs = src->n_runs; - - ans->n_runs = 0; - int k = 0; - for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { - // ans->runs[k] = src->runs[k]; (would be self-copy) - ans->n_runs++; - } - - // as with Java implementation, use locals to give self a buffer of depth 1 - rle16_t buffered = MAKE_RLE16(0, 0); - rle16_t next = buffered; - if (k < my_nbr_runs) buffered = src->runs[k]; - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < my_nbr_runs; ++k) { - if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; - - run_container_smart_append_exclusive(ans, buffered.value, - buffered.length); - buffered = next; - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); - - return return_typecode; -} - -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/mixed_negation.c */ /* begin file src/containers/mixed_equal.c */ #ifdef __cplusplus extern "C" { namespace roaring { namespace internal { #endif -static bool array_container_equal_bitset(const array_container_t* container1, +bool array_container_equal_bitset(const array_container_t* container1, const bitset_container_t* container2) { if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { if (container2->cardinality != container1->cardinality) { @@ -16316,7 +12581,7 @@ static bool array_container_equal_bitset(const array_container_t* container1, } } int32_t pos = 0; - int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { uint64_t w = container2->words[i]; while (w != 0) { uint64_t t = w & (~w + 1); @@ -16334,12 +12599,12 @@ static bool array_container_equal_bitset(const array_container_t* container1, return (pos == container1->cardinality); } -static bool run_container_equals_array(const run_container_t* container1, +bool run_container_equals_array(const run_container_t* container1, const array_container_t* container2) { if (run_container_cardinality(container1) != container2->cardinality) return false; int32_t pos = 0; - int i = 0; for (i = 0; i < container1->n_runs; ++i) { + for (int i = 0; i < container1->n_runs; ++i) { const uint32_t run_start = container1->runs[i].value; const uint32_t le = container1->runs[i].length; @@ -16356,7 +12621,7 @@ static bool run_container_equals_array(const run_container_t* container1, return true; } -static bool run_container_equals_bitset(const run_container_t* container1, +bool run_container_equals_bitset(const run_container_t* container1, const bitset_container_t* container2) { int run_card = run_container_cardinality(container1); @@ -16367,7 +12632,7 @@ static bool run_container_equals_bitset(const run_container_t* container1, return false; } - int32_t i; for (i = 0; i < container1->n_runs; i++) { + for (int32_t i = 0; i < container1->n_runs; i++) { uint32_t begin = container1->runs[i].value; if (container1->runs[i].length) { uint32_t end = begin + container1->runs[i].length + 1; @@ -16388,896 +12653,6 @@ static bool run_container_equals_bitset(const run_container_t* container1, } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/mixed_equal.c */ -/* begin file src/containers/bitset.c */ -/* - * bitset.c - * - */ -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - - -#ifdef __cplusplus -extern "C" { namespace roaring { namespace internal { -#endif - -extern inline int bitset_container_cardinality(const bitset_container_t *bitset); -extern inline bool bitset_container_nonzero_cardinality(bitset_container_t *bitset); -extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos); -extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos); -extern inline int32_t bitset_container_serialized_size_in_bytes(void); -extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos); - -static void bitset_container_clear(bitset_container_t *bitset) { - memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = 0; -} - -static void bitset_container_set_all(bitset_container_t *bitset) { - memset(bitset->words, INT64_C(-1), - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = (1 << 16); -} - - - -/* Create a new bitset. Return NULL in case of failure. */ -static bitset_container_t *bitset_container_create(void) { - bitset_container_t *bitset = - (bitset_container_t *)ndpi_malloc(sizeof(bitset_container_t)); - - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->words = (uint64_t *)roaring_bitmap_aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->words) { - ndpi_free(bitset); - return NULL; - } - bitset_container_clear(bitset); - return bitset; -} - -/* Copy one container into another. We assume that they are distinct. */ -static void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest) { - dest->cardinality = source->cardinality; - memcpy(dest->words, source->words, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} - -static void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return; // refuse to crash - if ((64 % step) == 0) { // step divides 64 - uint64_t mask = 0; // construct the repeated mask - uint32_t value; for (value = (min % step); value < 64; value += step) { - mask |= ((uint64_t)1 << value); - } - uint32_t firstword = min / 64; - uint32_t endword = (max - 1) / 64; - bitset->cardinality = (max - min + step - 1) / step; - if (firstword == endword) { - bitset->words[firstword] |= - mask & (((~UINT64_C(0)) << (min % 64)) & - ((~UINT64_C(0)) >> ((~max + 1) % 64))); - return; - } - bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); - uint32_t i; for (i = firstword + 1; i < endword; i++) - bitset->words[i] = mask; - bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); - } else { - uint32_t value; for (value = min; value < max; value += step) { - bitset_container_add(bitset, value); - } - } -} - -/* Free memory. */ -static void bitset_container_free(bitset_container_t *bitset) { - if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise - roaring_bitmap_aligned_free(bitset->words); - bitset->words = NULL; // pedantic - } - ndpi_free(bitset); -} - -/* duplicate container. */ -static bitset_container_t *bitset_container_clone(const bitset_container_t *src) { - bitset_container_t *bitset = - (bitset_container_t *)ndpi_malloc(sizeof(bitset_container_t)); - - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->words = (uint64_t *)roaring_bitmap_aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->words) { - ndpi_free(bitset); - return NULL; - } - bitset->cardinality = src->cardinality; - memcpy(bitset->words, src->words, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - return bitset; -} - -static void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end) { - bitset_set_range(bitset->words, begin, end); - bitset->cardinality = - bitset_container_compute_cardinality(bitset); // could be smarter -} - - -static bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2) { - // could vectorize, but this is probably already quite fast in practice - const uint64_t * __restrict__ words_1 = src_1->words; - const uint64_t * __restrict__ words_2 = src_2->words; - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((words_1[i] & words_2[i]) != 0) return true; - } - return false; -} - - -#ifdef CROARING_IS_X64 -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) -#endif -/* Get the number of bits set (force computation) */ -static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) { - const uint64_t *words = bitset->words; - int32_t sum = 0; - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { - sum += hamming(words[i]); - sum += hamming(words[i + 1]); - sum += hamming(words[i + 2]); - sum += hamming(words[i + 3]); - } - return sum; -} -/* Get the number of bits set (force computation) */ -static int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - if( croaring_avx2() ) { - return (int) avx2_harley_seal_popcount256( - (const __m256i *)bitset->words, - BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); - } else { - return _scalar_bitset_container_compute_cardinality(bitset); - - } -} - -#elif defined(USENEON) -static int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - uint16x8_t n0 = vdupq_n_u16(0); - uint16x8_t n1 = vdupq_n_u16(0); - uint16x8_t n2 = vdupq_n_u16(0); - uint16x8_t n3 = vdupq_n_u16(0); - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { - uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]); - n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); - uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]); - n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); - uint64x2_t c2 = vld1q_u64(&bitset->words[i + 4]); - n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); - uint64x2_t c3 = vld1q_u64(&bitset->words[i + 6]); - n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); - } - uint64x2_t n = vdupq_n_u64(0); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); - return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); -} - -#else // CROARING_IS_X64 - -/* Get the number of bits set (force computation) */ -static int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - const uint64_t *words = bitset->words; - int32_t sum = 0; - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { - sum += hamming(words[i]); - sum += hamming(words[i + 1]); - sum += hamming(words[i + 2]); - sum += hamming(words[i + 3]); - } - return sum; -} - -#endif // CROARING_IS_X64 - -#ifdef CROARING_IS_X64 - -#define BITSET_CONTAINER_FN_REPEAT 8 -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) -#endif // WORDS_IN_AVX2_REG -#define LOOP_SIZE \ - BITSET_CONTAINER_SIZE_IN_WORDS / \ - ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) - -/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the - result to bitsetout */ -// clang-format off -#define AVX_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \ - neon_intrinsic, after) \ - static inline int _avx2_bitset_container_##opname##_nocard( \ - const bitset_container_t *src_1, const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words; \ - const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words; \ - /* not using the blocking optimization for some reason*/ \ - uint8_t *out = (uint8_t *)dst->words; \ - const int innerloop = 8; \ - size_t i; for (i = 0; \ - i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ - i += innerloop) { \ - __m256i A1, A2, AO; \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)out, AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 32), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 64), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 96), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 128), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 160), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 192), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out + 224), AO); \ - out += 256; \ - words_1 += 256; \ - words_2 += 256; \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ - } - -#define AVX_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \ - neon_intrinsic, after) \ - /* next, a version that updates cardinality*/ \ - static inline int _avx2_bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words; \ - const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words; \ - __m256i *out = (__m256i *)dst->words; \ - dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \ - words_2, words_1, out, \ - BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \ - return dst->cardinality; \ - } \ - -#define AVX_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \ - neon_intrinsic, after) \ - /* next, a version that just computes the cardinality*/ \ - static inline int _avx2_bitset_container_##opname##_justcard( \ - const bitset_container_t *src_1, const bitset_container_t *src_2) { \ - const __m256i *__restrict__ data1 = (const __m256i *)src_1->words; \ - const __m256i *__restrict__ data2 = (const __m256i *)src_2->words; \ - return (int)avx2_harley_seal_popcount256_##opname( \ - data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \ - } - - -// we duplicate the function because other containers use the "or" term, makes API more consistent -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -// we duplicate the function because other containers use the "or" term, makes API more consistent -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -// we duplicate the function because other containers use the "or" term, makes API more consistent -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION -CROARING_TARGET_AVX2 -AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION) -CROARING_UNTARGET_REGION - - -#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \ - neon_intrinsic) \ - static inline int _scalar_bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t *__restrict__ words_1 = src_1->words; \ - const uint64_t *__restrict__ words_2 = src_2->words; \ - uint64_t *out = dst->words; \ - int32_t sum = 0; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ - word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \ - out[i] = word_1; \ - out[i + 1] = word_2; \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - dst->cardinality = sum; \ - return dst->cardinality; \ - } \ - static inline int _scalar_bitset_container_##opname##_nocard( \ - const bitset_container_t *src_1, const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t *__restrict__ words_1 = src_1->words; \ - const uint64_t *__restrict__ words_2 = src_2->words; \ - uint64_t *out = dst->words; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ - out[i] = (words_1[i])opsymbol(words_2[i]); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ - } \ - static inline int _scalar_bitset_container_##opname##_justcard( \ - const bitset_container_t *src_1, const bitset_container_t *src_2) { \ - const uint64_t *__restrict__ words_1 = src_1->words; \ - const uint64_t *__restrict__ words_2 = src_2->words; \ - int32_t sum = 0; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ - word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - return sum; \ - } - -// we duplicate the function because other containers use the "or" term, makes API more consistent -SCALAR_BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) -SCALAR_BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -SCALAR_BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) -SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) - -SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) -SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) - - -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ - static int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - if ( croaring_avx2() ) { \ - return _avx2_bitset_container_##opname(src_1, src_2, dst); \ - } else { \ - return _scalar_bitset_container_##opname(src_1, src_2, dst); \ - } \ - } \ - static int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - if ( croaring_avx2() ) { \ - return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \ - } else { \ - return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \ - } \ - } \ - static int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - if ((croaring_detect_supported_architectures() & CROARING_AVX2) == \ - CROARING_AVX2) { \ - return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \ - } else { \ - return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \ - } \ - } - - - -#elif defined(USENEON) - -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ -static int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ - const uint64_t * __restrict__ words_2 = src_2->words; \ - uint64_t *out = dst->words; \ - uint16x8_t n0 = vdupq_n_u16(0); \ - uint16x8_t n1 = vdupq_n_u16(0); \ - uint16x8_t n2 = vdupq_n_u16(0); \ - uint16x8_t n3 = vdupq_n_u16(0); \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ - uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ - vld1q_u64(&words_2[i + 0])); \ - n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ - vst1q_u64(&out[i + 0], c0); \ - uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ - vld1q_u64(&words_2[i + 2])); \ - n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ - vst1q_u64(&out[i + 2], c1); \ - uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ - vld1q_u64(&words_2[i + 4])); \ - n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ - vst1q_u64(&out[i + 4], c2); \ - uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ - vld1q_u64(&words_2[i + 6])); \ - n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ - vst1q_u64(&out[i + 6], c3); \ - } \ - uint64x2_t n = vdupq_n_u64(0); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ - dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ - return dst->cardinality; \ -} \ -static int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ - const uint64_t * __restrict__ words_2 = src_2->words; \ - uint64_t *out = dst->words; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ - vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ - vld1q_u64(&words_2[i + 0]))); \ - vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ - vld1q_u64(&words_2[i + 2]))); \ - vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ - vld1q_u64(&words_2[i + 4]))); \ - vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ - vld1q_u64(&words_2[i + 6]))); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -static int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ - const uint64_t * __restrict__ words_2 = src_2->words; \ - uint16x8_t n0 = vdupq_n_u16(0); \ - uint16x8_t n1 = vdupq_n_u16(0); \ - uint16x8_t n2 = vdupq_n_u16(0); \ - uint16x8_t n3 = vdupq_n_u16(0); \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ - uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ - vld1q_u64(&words_2[i + 0])); \ - n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ - uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ - vld1q_u64(&words_2[i + 2])); \ - n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ - uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ - vld1q_u64(&words_2[i + 4])); \ - n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ - uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ - vld1q_u64(&words_2[i + 6])); \ - n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ - } \ - uint64x2_t n = vdupq_n_u64(0); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ - return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ -} - -#else - -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ -static int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ - const uint64_t * __restrict__ words_2 = src_2->words; \ - uint64_t *out = dst->words; \ - int32_t sum = 0; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ - word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \ - out[i] = word_1; \ - out[i + 1] = word_2; \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - dst->cardinality = sum; \ - return dst->cardinality; \ -} \ -static int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ - const uint64_t * __restrict__ words_2 = src_2->words; \ - uint64_t *out = dst->words; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ - out[i] = (words_1[i])opsymbol(words_2[i]); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -static int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ - const uint64_t * __restrict__ words_2 = src_2->words; \ - int32_t sum = 0; \ - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ - word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - return sum; \ -} - -#endif // CROARING_IS_X64 - -// we duplicate the function because other containers use the "or" term, makes API more consistent -BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) -BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) -BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) - -BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) -BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) -// clang-format On - - -static int bitset_container_to_uint32_array( - uint32_t *out, - const bitset_container_t *bc, - uint32_t base -){ -#ifdef CROARING_IS_X64 - if(( croaring_avx2() ) && (bc->cardinality >= 8192)) // heuristic - return (int) bitset_extract_setbits_avx2(bc->words, - BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base); - else - return (int) bitset_extract_setbits(bc->words, - BITSET_CONTAINER_SIZE_IN_WORDS, out, base); -#else - return (int) bitset_extract_setbits(bc->words, - BITSET_CONTAINER_SIZE_IN_WORDS, out, base); -#endif -} - -/* - * Print this container using printf (useful for debugging). - */ -static void bitset_container_printf(const bitset_container_t * v) { - printf("{"); - uint32_t base = 0; - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u",base + r); - iamfirst = false; - } else { - printf(",%u",base + r); - } - w ^= t; - } - base += 64; - } - printf("}"); -} - - -/* - * Print this container using printf as a comma-separated list of 32-bit integers starting at base. - */ -static void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u", r + base); - iamfirst = false; - } else { - printf(",%u",r + base); - } - w ^= t; - } - base += 64; - } -} - - -// TODO: use the fast lower bound, also -static int bitset_container_number_of_runs(bitset_container_t *bc) { - int num_runs = 0; - uint64_t next_word = bc->words[0]; - - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { - uint64_t word = next_word; - next_word = bc->words[i+1]; - num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); - } - - uint64_t word = next_word; - num_runs += hamming((~word) & (word << 1)); - if((word & 0x8000000000000000ULL) != 0) - num_runs++; - return num_runs; -} - - -static int32_t bitset_container_write(const bitset_container_t *container, - char *buf) { - memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); -} - - -static int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, - const char *buf) { - container->cardinality = cardinality; - memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); -} - -static bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { - int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(r + base, ptr)) return false; - w ^= t; - } - base += 64; - } - return true; -} - -static bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { - int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; - w ^= t; - } - base += 64; - } - return true; -} - -#ifdef CROARING_IS_X64 -CROARING_TARGET_AVX2 -static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { - const __m256i *ptr1 = (const __m256i*)container1->words; - const __m256i *ptr2 = (const __m256i*)container2->words; - size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) { - __m256i r1 = _mm256_load_si256(ptr1+i); - __m256i r2 = _mm256_load_si256(ptr2+i); - int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); - if ((uint32_t)mask != UINT32_MAX) { - return false; - } - } - return true; -} -CROARING_UNTARGET_REGION -#endif // CROARING_IS_X64 - -static bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality != container2->cardinality) { - return false; - } - if (container1->cardinality == INT32_C(0x10000)) { - return true; - } - } -#ifdef CROARING_IS_X64 - if( croaring_avx2() ) { - return _avx2_bitset_container_equals(container1, container2); - } -#endif - return memcmp(container1->words, - container2->words, - BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0; -} - -static bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality > container2->cardinality) { - return false; - } - } - int32_t i ; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - if((container1->words[i] & container2->words[i]) != container1->words[i]) { - return false; - } - } - return true; -} - -static bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { - int card = bitset_container_cardinality(container); - if(rank >= *start_rank + card) { - *start_rank += card; - return false; - } - const uint64_t *words = container->words; - int32_t size; - int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { - size = hamming(words[i]); - if(rank <= *start_rank + size) { - uint64_t w = container->words[i]; - uint16_t base = i*64; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(*start_rank == rank) { - *element = r+base; - return true; - } - w ^= t; - *start_rank += 1; - } - } - else - *start_rank += size; - } - assert(false); - __builtin_unreachable(); -} - - -/* Returns the smallest value (assumes not empty) */ -static uint16_t bitset_container_minimum(const bitset_container_t *container) { - int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = container->words[i]; - if (w != 0) { - int r = __builtin_ctzll(w); - return r + i * 64; - } - } - return UINT16_MAX; -} - -/* Returns the largest value (assumes not empty) */ -static uint16_t bitset_container_maximum(const bitset_container_t *container) { - int32_t i; for (i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { - uint64_t w = container->words[i]; - if (w != 0) { - int r = __builtin_clzll(w); - return i * 64 + 63 - r; - } - } - return 0; -} - -/* Returns the number of values equal or smaller than x */ -static int bitset_container_rank(const bitset_container_t *container, uint16_t x) { - // credit: aqrit - int sum = 0; - int i = 0, end; - for (end = x / 64; i < end; i++){ - sum += hamming(container->words[i]); - } - uint64_t lastword = container->words[i]; - uint64_t lastpos = UINT64_C(1) << (x % 64); - uint64_t mask = lastpos + lastpos - 1; // smear right - sum += hamming(lastword & mask); - return sum; -} - -/* Returns the index of the first value equal or larger than x, or -1 */ -static int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { - uint32_t x32 = x; - uint32_t k = x32 / 64; - uint64_t word = container->words[k]; - const int diff = x32 - k * 64; // in [0,64) - word = (word >> diff) << diff; // a mask is faster, but we don't care - while(word == 0) { - k++; - if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; - word = container->words[k]; - } - return k * 64 + __builtin_ctzll(word); -} - -#ifdef __cplusplus -} } } // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/bitset.c */ /* begin file src/containers/mixed_intersection.c */ /* * mixed_intersection.c @@ -17291,7 +12666,7 @@ extern "C" { namespace roaring { namespace internal { /* Compute the intersection of src_1 and src_2 and write the result to * dst. */ -static void array_bitset_container_intersection(const array_container_t *src_1, +void array_bitset_container_intersection(const array_container_t *src_1, const bitset_container_t *src_2, array_container_t *dst) { if (dst->capacity < src_1->cardinality) { @@ -17299,7 +12674,7 @@ static void array_bitset_container_intersection(const array_container_t *src_1, } int32_t newcard = 0; // dst could be src_1 const int32_t origcard = src_1->cardinality; - int i = 0; for (i = 0; i < origcard; ++i) { + for (int i = 0; i < origcard; ++i) { uint16_t key = src_1->array[i]; // this branchless approach is much faster... dst->array[newcard] = key; @@ -17321,11 +12696,11 @@ static void array_bitset_container_intersection(const array_container_t *src_1, } /* Compute the size of the intersection of src_1 and src_2. */ -static int array_bitset_container_intersection_cardinality( +int array_bitset_container_intersection_cardinality( const array_container_t *src_1, const bitset_container_t *src_2) { int32_t newcard = 0; const int32_t origcard = src_1->cardinality; - int i = 0; for (i = 0; i < origcard; ++i) { + for (int i = 0; i < origcard; ++i) { uint16_t key = src_1->array[i]; newcard += bitset_container_contains(src_2, key); } @@ -17333,10 +12708,10 @@ static int array_bitset_container_intersection_cardinality( } -static bool array_bitset_container_intersect(const array_container_t *src_1, +bool array_bitset_container_intersect(const array_container_t *src_1, const bitset_container_t *src_2) { const int32_t origcard = src_1->cardinality; - int i = 0; for (i = 0; i < origcard; ++i) { + for (int i = 0; i < origcard; ++i) { uint16_t key = src_1->array[i]; if(bitset_container_contains(src_2, key)) return true; } @@ -17346,7 +12721,7 @@ static bool array_bitset_container_intersect(const array_container_t *src_1, /* Compute the intersection of src_1 and src_2 and write the result to * dst. It is allowed for dst to be equal to src_1. We assume that dst is a * valid container. */ -static void array_run_container_intersection(const array_container_t *src_1, +void array_run_container_intersection(const array_container_t *src_1, const run_container_t *src_2, array_container_t *dst) { if (run_container_is_full(src_2)) { @@ -17390,7 +12765,7 @@ static void array_run_container_intersection(const array_container_t *src_1, * *dst. If the result is true then the result is a bitset_container_t * otherwise is a array_container_t. If *dst == src_2, an in-place processing * is attempted.*/ -static bool run_bitset_container_intersection( +bool run_bitset_container_intersection( const run_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -17410,10 +12785,10 @@ static bool run_bitset_container_intersection( if (*dst == NULL) { return false; } - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; - uint32_t endofrun = (uint32_t)rle.value + rle.length, runValue; - for (runValue = rle.value; runValue <= endofrun; + uint32_t endofrun = (uint32_t)rle.value + rle.length; + for (uint32_t runValue = rle.value; runValue <= endofrun; ++runValue) { answer->array[answer->cardinality] = (uint16_t)runValue; answer->cardinality += @@ -17425,7 +12800,7 @@ static bool run_bitset_container_intersection( if (*dst == src_2) { // we attempt in-place bitset_container_t *answer = CAST_bitset(*dst); uint32_t start = 0; - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { const rle16_t rle = src_1->runs[rlepos]; uint32_t end = rle.value; bitset_reset_range(src_2->words, start, end); @@ -17454,7 +12829,7 @@ static bool run_bitset_container_intersection( return true; } uint32_t start = 0; - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { const rle16_t rle = src_1->runs[rlepos]; uint32_t end = rle.value; bitset_reset_range(answer->words, start, end); @@ -17479,7 +12854,7 @@ static bool run_bitset_container_intersection( } /* Compute the size of the intersection between src_1 and src_2 . */ -static int array_run_container_intersection_cardinality(const array_container_t *src_1, +int array_run_container_intersection_cardinality(const array_container_t *src_1, const run_container_t *src_2) { if (run_container_is_full(src_2)) { return src_1->cardinality; @@ -17514,13 +12889,13 @@ static int array_run_container_intersection_cardinality(const array_container_t /* Compute the intersection between src_1 and src_2 **/ -static int run_bitset_container_intersection_cardinality( +int run_bitset_container_intersection_cardinality( const run_container_t *src_1, const bitset_container_t *src_2) { if (run_container_is_full(src_1)) { return bitset_container_cardinality(src_2); } int answer = 0; - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; answer += bitset_lenrange_cardinality(src_2->words, rle.value, rle.length); @@ -17529,7 +12904,7 @@ static int run_bitset_container_intersection_cardinality( } -static bool array_run_container_intersect(const array_container_t *src_1, +bool array_run_container_intersect(const array_container_t *src_1, const run_container_t *src_2) { if( run_container_is_full(src_2) ) { return !array_container_empty(src_1); @@ -17562,12 +12937,12 @@ static bool array_run_container_intersect(const array_container_t *src_1, /* Compute the intersection between src_1 and src_2 **/ -static bool run_bitset_container_intersect(const run_container_t *src_1, +bool run_bitset_container_intersect(const run_container_t *src_1, const bitset_container_t *src_2) { if( run_container_is_full(src_1) ) { return !bitset_container_empty(src_2); } - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; if(!bitset_lenrange_empty(src_2->words, rle.value,rle.length)) return true; } @@ -17579,7 +12954,7 @@ static bool run_bitset_container_intersect(const run_container_t *src_1, * to *dst. If the return function is true, the result is a bitset_container_t * otherwise is a array_container_t. */ -static bool bitset_bitset_container_intersection( +bool bitset_bitset_container_intersection( const bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -17602,7 +12977,7 @@ static bool bitset_bitset_container_intersection( return false; // not a bitset } -static bool bitset_bitset_container_intersection_inplace( +bool bitset_bitset_container_intersection_inplace( bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -17627,20 +13002,377 @@ static bool bitset_bitset_container_intersection_inplace( } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/mixed_intersection.c */ +/* begin file src/containers/mixed_negation.c */ +/* + * mixed_negation.c + * + */ + +#include <assert.h> +#include <string.h> + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +// TODO: make simplified and optimized negation code across +// the full range. + +/* Negation across the entire range of the container. + * Compute the negation of src and write the result + * to *dst. The complement of a + * sufficiently sparse set will always be dense and a hence a bitmap +' * We assume that dst is pre-allocated and a valid bitset container + * There can be no in-place version. + */ +void array_container_negation(const array_container_t *src, + bitset_container_t *dst) { + uint64_t card = UINT64_C(1 << 16); + bitset_container_set_all(dst); + + if (src->cardinality == 0) { + return; + } + + dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array, + (uint64_t)src->cardinality); +} + +/* Negation across the entire range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation( + const bitset_container_t *src, container_t **dst +){ + return bitset_container_negation_range(src, 0, (1 << 16), dst); +} + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_inplace( + bitset_container_t *src, container_t **dst +){ + return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); +} + +/* Negation across the entire range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation(const run_container_t *src, container_t **dst) { + return run_container_negation_range(src, 0, (1 << 16), dst); +} + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_inplace(run_container_t *src, container_t **dst) { + return run_container_negation_range_inplace(src, 0, (1 << 16), dst); +} + +/* Negation across a range of the container. + * Compute the negation of src and write the result + * to *dst. Returns true if the result is a bitset container + * and false for an array container. *dst is not preallocated. + */ +bool array_container_negation_range( + const array_container_t *src, + const int range_start, const int range_end, + container_t **dst +){ + /* close port of the Java implementation */ + if (range_start >= range_end) { + *dst = array_container_clone(src); + return false; + } + + int32_t start_index = + binarySearch(src->array, src->cardinality, (uint16_t)range_start); + if (start_index < 0) start_index = -start_index - 1; + + int32_t last_index = + binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); + if (last_index < 0) last_index = -last_index - 2; + + const int32_t current_values_in_range = last_index - start_index + 1; + const int32_t span_to_be_flipped = range_end - range_start; + const int32_t new_values_in_range = + span_to_be_flipped - current_values_in_range; + const int32_t cardinality_change = + new_values_in_range - current_values_in_range; + const int32_t new_cardinality = src->cardinality + cardinality_change; + + if (new_cardinality > DEFAULT_MAX_SIZE) { + bitset_container_t *temp = bitset_container_from_array(src); + bitset_flip_range(temp->words, (uint32_t)range_start, + (uint32_t)range_end); + temp->cardinality = new_cardinality; + *dst = temp; + return true; + } + + array_container_t *arr = + array_container_create_given_capacity(new_cardinality); + *dst = (container_t *)arr; + if(new_cardinality == 0) { + arr->cardinality = new_cardinality; + return false; // we are done. + } + // copy stuff before the active area + memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); + + // work on the range + int32_t out_pos = start_index, in_pos = start_index; + int32_t val_in_range = range_start; + for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { + if ((uint16_t)val_in_range != src->array[in_pos]) { + arr->array[out_pos++] = (uint16_t)val_in_range; + } else { + ++in_pos; + } + } + for (; val_in_range < range_end; ++val_in_range) + arr->array[out_pos++] = (uint16_t)val_in_range; + + // content after the active range + memcpy(arr->array + out_pos, src->array + (last_index + 1), + (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); + arr->cardinality = new_cardinality; + return false; +} + +/* Even when the result would fit, it is unclear how to make an + * inplace version without inefficient copying. + */ + +bool array_container_negation_range_inplace( + array_container_t *src, + const int range_start, const int range_end, + container_t **dst +){ + bool ans = array_container_negation_range(src, range_start, range_end, dst); + // TODO : try a real inplace version + array_container_free(src); + return ans; +} + +/* Negation across a range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation_range( + const bitset_container_t *src, + const int range_start, const int range_end, + container_t **dst +){ + // TODO maybe consider density-based estimate + // and sometimes build result directly as array, with + // conversion back to bitset if wrong. Or determine + // actual result cardinality, then go directly for the known final cont. + + // keep computation using bitsets as long as possible. + bitset_container_t *t = bitset_container_clone(src); + bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end); + t->cardinality = bitset_container_compute_cardinality(t); + + if (t->cardinality > DEFAULT_MAX_SIZE) { + *dst = t; + return true; + } else { + *dst = array_container_from_bitset(t); + bitset_container_free(t); + return false; + } +} + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_range_inplace( + bitset_container_t *src, + const int range_start, const int range_end, + container_t **dst +){ + bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end); + src->cardinality = bitset_container_compute_cardinality(src); + if (src->cardinality > DEFAULT_MAX_SIZE) { + *dst = src; + return true; + } + *dst = array_container_from_bitset(src); + bitset_container_free(src); + return false; +} + +/* Negation across a range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation_range( + const run_container_t *src, + const int range_start, const int range_end, + container_t **dst +){ + uint8_t return_typecode; + + // follows the Java implementation + if (range_end <= range_start) { + *dst = run_container_clone(src); + return RUN_CONTAINER_TYPE; + } + + run_container_t *ans = run_container_create_given_capacity( + src->n_runs + 1); // src->n_runs + 1); + int k = 0; + for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { + ans->runs[k] = src->runs[k]; + ans->n_runs++; + } + + run_container_smart_append_exclusive( + ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); + + for (; k < src->n_runs; ++k) { + run_container_smart_append_exclusive(ans, src->runs[k].value, + src->runs[k].length); + } + + *dst = convert_run_to_efficient_container(ans, &return_typecode); + if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); + + return return_typecode; +} + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_range_inplace( + run_container_t *src, + const int range_start, const int range_end, + container_t **dst +){ + uint8_t return_typecode; + + if (range_end <= range_start) { + *dst = src; + return RUN_CONTAINER_TYPE; + } + + // TODO: efficient special case when range is 0 to 65535 inclusive + + if (src->capacity == src->n_runs) { + // no excess room. More checking to see if result can fit + bool last_val_before_range = false; + bool first_val_in_range = false; + bool last_val_in_range = false; + bool first_val_past_range = false; + + if (range_start > 0) + last_val_before_range = + run_container_contains(src, (uint16_t)(range_start - 1)); + first_val_in_range = run_container_contains(src, (uint16_t)range_start); + + if (last_val_before_range == first_val_in_range) { + last_val_in_range = + run_container_contains(src, (uint16_t)(range_end - 1)); + if (range_end != 0x10000) + first_val_past_range = + run_container_contains(src, (uint16_t)range_end); + + if (last_val_in_range == + first_val_past_range) { // no space for inplace + int ans = run_container_negation_range(src, range_start, + range_end, dst); + run_container_free(src); + return ans; + } + } + } + // all other cases: result will fit + + run_container_t *ans = src; + int my_nbr_runs = src->n_runs; + + ans->n_runs = 0; + int k = 0; + for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { + // ans->runs[k] = src->runs[k]; (would be self-copy) + ans->n_runs++; + } + + // as with Java implementation, use locals to give self a buffer of depth 1 + rle16_t buffered = MAKE_RLE16(0, 0); + rle16_t next = buffered; + if (k < my_nbr_runs) buffered = src->runs[k]; + + run_container_smart_append_exclusive( + ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); + + for (; k < my_nbr_runs; ++k) { + if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; + + run_container_smart_append_exclusive(ans, buffered.value, + buffered.length); + buffered = next; + } + + *dst = convert_run_to_efficient_container(ans, &return_typecode); + if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); + + return return_typecode; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_negation.c */ /* begin file src/containers/mixed_subset.c */ #ifdef __cplusplus extern "C" { namespace roaring { namespace internal { #endif -static bool array_container_is_subset_bitset(const array_container_t* container1, +bool array_container_is_subset_bitset(const array_container_t* container1, const bitset_container_t* container2) { if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { if (container2->cardinality < container1->cardinality) { return false; } } - int i = 0; for (i = 0; i < container1->cardinality; ++i) { + for (int i = 0; i < container1->cardinality; ++i) { if (!bitset_container_contains(container2, container1->array[i])) { return false; } @@ -17648,19 +13380,19 @@ static bool array_container_is_subset_bitset(const array_container_t* container1 return true; } -static bool run_container_is_subset_array(const run_container_t* container1, +bool run_container_is_subset_array(const run_container_t* container1, const array_container_t* container2) { if (run_container_cardinality(container1) > container2->cardinality) return false; int32_t start_pos = -1, stop_pos = -1; - int i = 0; for (i = 0; i < container1->n_runs; ++i) { + for (int i = 0; i < container1->n_runs; ++i) { int32_t start = container1->runs[i].value; int32_t stop = start + container1->runs[i].length; start_pos = advanceUntil(container2->array, stop_pos, container2->cardinality, start); stop_pos = advanceUntil(container2->array, stop_pos, container2->cardinality, stop); - if (start_pos == container2->cardinality) { + if (stop_pos == container2->cardinality) { return false; } else if (stop_pos - start_pos != stop - start || container2->array[start_pos] != start || @@ -17671,7 +13403,7 @@ static bool run_container_is_subset_array(const run_container_t* container1, return true; } -static bool array_container_is_subset_run(const array_container_t* container1, +bool array_container_is_subset_run(const array_container_t* container1, const run_container_t* container2) { if (container1->cardinality > run_container_cardinality(container2)) return false; @@ -17694,7 +13426,7 @@ static bool array_container_is_subset_run(const array_container_t* container1, } } -static bool run_container_is_subset_bitset(const run_container_t* container1, +bool run_container_is_subset_bitset(const run_container_t* container1, const bitset_container_t* container2) { // todo: this code could be much faster if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { @@ -17708,10 +13440,10 @@ static bool run_container_is_subset_bitset(const run_container_t* container1, return false; } } - int i = 0; for (i = 0; i < container1->n_runs; ++i) { + for (int i = 0; i < container1->n_runs; ++i) { uint32_t run_start = container1->runs[i].value; uint32_t le = container1->runs[i].length; - uint32_t j; for (j = run_start; j <= run_start + le; ++j) { + for (uint32_t j = run_start; j <= run_start + le; ++j) { if (!bitset_container_contains(container2, j)) { return false; } @@ -17720,7 +13452,7 @@ static bool run_container_is_subset_bitset(const run_container_t* container1, return true; } -static bool bitset_container_is_subset_run(const bitset_container_t* container1, +bool bitset_container_is_subset_run(const bitset_container_t* container1, const run_container_t* container2) { // todo: this code could be much faster if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { @@ -17768,6 +13500,305 @@ static bool bitset_container_is_subset_run(const bitset_container_t* container1, } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/mixed_subset.c */ +/* begin file src/containers/mixed_union.c */ +/* + * mixed_union.c + * + */ + +#include <assert.h> +#include <string.h> + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the union of src_1 and src_2 and write the result to + * dst. */ +void array_bitset_container_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + dst->cardinality = (int32_t)bitset_set_list_withcard( + dst->words, dst->cardinality, src_1->array, src_1->cardinality); +} + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ +void array_bitset_container_lazy_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + bitset_set_list(dst->words, src_1->array, src_1->cardinality); + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} + +void run_bitset_container_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + assert(!run_container_is_full(src_1)); // catch this case upstream + if (src_2 != dst) bitset_container_copy(src_2, dst); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_set_lenrange(dst->words, rle.value, rle.length); + } + dst->cardinality = bitset_container_compute_cardinality(dst); +} + +void run_bitset_container_lazy_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + assert(!run_container_is_full(src_1)); // catch this case upstream + if (src_2 != dst) bitset_container_copy(src_2, dst); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_set_lenrange(dst->words, rle.value, rle.length); + } + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} + +// why do we leave the result as a run container?? +void array_run_container_union(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + if (run_container_is_full(src_2)) { + run_container_copy(src_2, dst); + return; + } + // TODO: see whether the "2*" is spurious + run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); + int32_t rlepos = 0; + int32_t arraypos = 0; + rle16_t previousrle; + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + previousrle = run_container_append_first(dst, src_2->runs[rlepos]); + rlepos++; + } else { + previousrle = + run_container_append_value_first(dst, src_1->array[arraypos]); + arraypos++; + } + while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + run_container_append(dst, src_2->runs[rlepos], &previousrle); + rlepos++; + } else { + run_container_append_value(dst, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } + if (arraypos < src_1->cardinality) { + while (arraypos < src_1->cardinality) { + run_container_append_value(dst, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } else { + while (rlepos < src_2->n_runs) { + run_container_append(dst, src_2->runs[rlepos], &previousrle); + rlepos++; + } + } +} + +void array_run_container_inplace_union(const array_container_t *src_1, + run_container_t *src_2) { + if (run_container_is_full(src_2)) { + return; + } + const int32_t maxoutput = src_1->cardinality + src_2->n_runs; + const int32_t neededcapacity = maxoutput + src_2->n_runs; + if (src_2->capacity < neededcapacity) + run_container_grow(src_2, neededcapacity, true); + memmove(src_2->runs + maxoutput, src_2->runs, + src_2->n_runs * sizeof(rle16_t)); + rle16_t *inputsrc2 = src_2->runs + maxoutput; + int32_t rlepos = 0; + int32_t arraypos = 0; + int src2nruns = src_2->n_runs; + src_2->n_runs = 0; + + rle16_t previousrle; + + if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { + previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); + rlepos++; + } else { + previousrle = + run_container_append_value_first(src_2, src_1->array[arraypos]); + arraypos++; + } + + while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { + if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { + run_container_append(src_2, inputsrc2[rlepos], &previousrle); + rlepos++; + } else { + run_container_append_value(src_2, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } + if (arraypos < src_1->cardinality) { + while (arraypos < src_1->cardinality) { + run_container_append_value(src_2, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } else { + while (rlepos < src2nruns) { + run_container_append(src_2, inputsrc2[rlepos], &previousrle); + rlepos++; + } + } +} + +bool array_array_container_union( + const array_container_t *src_1, const array_container_t *src_2, + container_t **dst +){ + int totalCardinality = src_1->cardinality + src_2->cardinality; + if (totalCardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_create_given_capacity(totalCardinality); + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + ourbitset->cardinality = (int32_t)bitset_set_list_withcard( + ourbitset->words, src_1->cardinality, src_2->array, + src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + *dst = array_container_from_bitset(ourbitset); + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } + } + return returnval; +} + +bool array_array_container_inplace_union( + array_container_t *src_1, const array_container_t *src_2, + container_t **dst +){ + int totalCardinality = src_1->cardinality + src_2->cardinality; + *dst = NULL; + if (totalCardinality <= DEFAULT_MAX_SIZE) { + if(src_1->capacity < totalCardinality) { + *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } else { + memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); + src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, + src_2->array, src_2->cardinality, src_1->array); + return false; // not a bitset + } + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + ourbitset->cardinality = (int32_t)bitset_set_list_withcard( + ourbitset->words, src_1->cardinality, src_2->array, + src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + if(src_1->capacity < ourbitset->cardinality) { + array_container_grow(src_1, ourbitset->cardinality, false); + } + + bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS, + src_1->array, 0); + src_1->cardinality = ourbitset->cardinality; + *dst = src_1; + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } + } + return returnval; +} + + +bool array_array_container_lazy_union( + const array_container_t *src_1, const array_container_t *src_2, + container_t **dst +){ + int totalCardinality = src_1->cardinality + src_2->cardinality; + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + *dst = array_container_create_given_capacity(totalCardinality); + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; +} + + +bool array_array_container_lazy_inplace_union( + array_container_t *src_1, const array_container_t *src_2, + container_t **dst +){ + int totalCardinality = src_1->cardinality + src_2->cardinality; + *dst = NULL; + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + if(src_1->capacity < totalCardinality) { + *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } else { + memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); + src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, + src_2->array, src_2->cardinality, src_1->array); + return false; // not a bitset + } + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_union.c */ /* begin file src/containers/mixed_xor.c */ /* * mixed_xor.c @@ -17784,7 +13815,7 @@ extern "C" { namespace roaring { namespace internal { /* Compute the xor of src_1 and src_2 and write the result to * dst (which has no container initially). * Result is true iff dst is a bitset */ -static bool array_bitset_container_xor( +bool array_bitset_container_xor( const array_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -17808,7 +13839,7 @@ static bool array_bitset_container_xor( * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -static void array_bitset_container_lazy_xor(const array_container_t *src_1, +void array_bitset_container_lazy_xor(const array_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst) { if (src_2 != dst) bitset_container_copy(src_2, dst); @@ -17823,14 +13854,14 @@ static void array_bitset_container_lazy_xor(const array_container_t *src_1, * result true) or an array container. */ -static bool run_bitset_container_xor( +bool run_bitset_container_xor( const run_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ bitset_container_t *result = bitset_container_create(); bitset_container_copy(src_2, result); - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; bitset_flip_range(result->words, rle.value, rle.value + rle.length + UINT32_C(1)); @@ -17851,11 +13882,11 @@ static bool run_bitset_container_xor( * cardinality would dictate an array container. */ -static void run_bitset_container_lazy_xor(const run_container_t *src_1, +void run_bitset_container_lazy_xor(const run_container_t *src_1, const bitset_container_t *src_2, bitset_container_t *dst) { if (src_2 != dst) bitset_container_copy(src_2, dst); - int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; bitset_flip_range(dst->words, rle.value, rle.value + rle.length + UINT32_C(1)); @@ -17867,7 +13898,7 @@ static void run_bitset_container_lazy_xor(const run_container_t *src_1, * can become any kind of container. */ -static int array_run_container_xor( +int array_run_container_xor( const array_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -17912,7 +13943,7 @@ static int array_run_container_xor( * smaller. */ -static void array_run_container_lazy_xor(const array_container_t *src_1, +void array_run_container_lazy_xor(const array_container_t *src_1, const run_container_t *src_2, run_container_t *dst) { run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); @@ -17946,7 +13977,7 @@ static void array_run_container_lazy_xor(const array_container_t *src_1, * can become any kind of container. */ -static int run_run_container_xor( +int run_run_container_xor( const run_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -17965,7 +13996,7 @@ static int run_run_container_xor( * */ -static bool array_array_container_xor( +bool array_array_container_xor( const array_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -17991,7 +14022,7 @@ static bool array_array_container_xor( return returnval; } -static bool array_array_container_lazy_xor( +bool array_array_container_lazy_xor( const array_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -18018,7 +14049,7 @@ static bool array_array_container_lazy_xor( * "dst is a bitset" */ -static bool bitset_bitset_container_xor( +bool bitset_bitset_container_xor( const bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -18041,7 +14072,7 @@ static bool bitset_bitset_container_xor( * cases, the caller is responsible for deallocating dst. * Returns true iff dst is a bitset */ -static bool bitset_array_container_ixor( +bool bitset_array_container_ixor( bitset_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -18062,16 +14093,22 @@ static bool bitset_array_container_ixor( * Anything inplace with a bitset is a good candidate */ -static bool bitset_bitset_container_ixor( +bool bitset_bitset_container_ixor( bitset_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ - bool ans = bitset_bitset_container_xor(src_1, src_2, dst); - bitset_container_free(src_1); - return ans; + int card = bitset_container_xor(src_1, src_2, src_1); + if (card <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset + } else { + *dst = src_1; + return true; + } } -static bool array_bitset_container_ixor( +bool array_bitset_container_ixor( array_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -18087,7 +14124,7 @@ static bool array_bitset_container_ixor( * result true) or an array container. */ -static bool run_bitset_container_ixor( +bool run_bitset_container_ixor( run_container_t *src_1, const bitset_container_t *src_2, container_t **dst ){ @@ -18096,7 +14133,7 @@ static bool run_bitset_container_ixor( return ans; } -static bool bitset_run_container_ixor( +bool bitset_run_container_ixor( bitset_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -18109,7 +14146,7 @@ static bool bitset_run_container_ixor( * can become any kind of container. */ -static int array_run_container_ixor( +int array_run_container_ixor( array_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -18118,7 +14155,7 @@ static int array_run_container_ixor( return ans; } -static int run_array_container_ixor( +int run_array_container_ixor( run_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -18127,7 +14164,7 @@ static int run_array_container_ixor( return ans; } -static bool array_array_container_ixor( +bool array_array_container_ixor( array_container_t *src_1, const array_container_t *src_2, container_t **dst ){ @@ -18136,7 +14173,7 @@ static bool array_array_container_ixor( return ans; } -static int run_run_container_ixor( +int run_run_container_ixor( run_container_t *src_1, const run_container_t *src_2, container_t **dst ){ @@ -18149,1014 +14186,5455 @@ static int run_run_container_ixor( } } } // extern "C" { namespace roaring { namespace internal { #endif /* end file src/containers/mixed_xor.c */ -/* begin file src/bitset_util.c */ -#include <assert.h> -#include <stdint.h> +/* begin file src/containers/run.c */ #include <stdio.h> #include <stdlib.h> -#include <string.h> #ifdef __cplusplus extern "C" { namespace roaring { namespace internal { #endif -#ifdef CROARING_IS_X64 -static uint8_t lengthTable[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -#endif +extern inline uint16_t run_container_minimum(const run_container_t *run); +extern inline uint16_t run_container_maximum(const run_container_t *run); +extern inline int32_t interleavedBinarySearch(const rle16_t *array, + int32_t lenarray, uint16_t ikey); +extern inline bool run_container_contains(const run_container_t *run, + uint16_t pos); +extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); +extern inline bool run_container_is_full(const run_container_t *run); +extern inline bool run_container_nonzero_cardinality(const run_container_t *rc); +extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); +extern inline run_container_t *run_container_create_range(uint32_t start, + uint32_t stop); +extern inline int run_container_cardinality(const run_container_t *run); -#ifdef CROARING_IS_X64 -ALIGNED(32) -static uint32_t vecDecodeTable[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; -#endif // #ifdef CROARING_IS_X64 +bool run_container_add(run_container_t *run, uint16_t pos) { + int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); + if (index >= 0) return false; // already there + index = -index - 2; // points to preceding value, possibly -1 + if (index >= 0) { // possible match + int32_t offset = pos - run->runs[index].value; + int32_t le = run->runs[index].length; + if (offset <= le) return false; // already there + if (offset == le + 1) { + // we may need to fuse + if (index + 1 < run->n_runs) { + if (run->runs[index + 1].value == pos + 1) { + // indeed fusion is needed + run->runs[index].length = run->runs[index + 1].value + + run->runs[index + 1].length - + run->runs[index].value; + recoverRoomAtIndex(run, (uint16_t)(index + 1)); + return true; + } + } + run->runs[index].length++; + return true; + } + if (index + 1 < run->n_runs) { + // we may need to fuse + if (run->runs[index + 1].value == pos + 1) { + // indeed fusion is needed + run->runs[index + 1].value = pos; + run->runs[index + 1].length = run->runs[index + 1].length + 1; + return true; + } + } + } + if (index == -1) { + // we may need to extend the first run + if (0 < run->n_runs) { + if (run->runs[0].value == pos + 1) { + run->runs[0].length++; + run->runs[0].value--; + return true; + } + } + } + makeRoomAtIndex(run, (uint16_t)(index + 1)); + run->runs[index + 1].value = pos; + run->runs[index + 1].length = 0; + return true; +} -#ifdef CROARING_IS_X64 -// same as vecDecodeTable but in 16 bits -ALIGNED(32) -static uint16_t vecDecodeTable_uint16[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create_given_capacity(int32_t size) { + run_container_t *run; + /* Allocate the run container itself. */ + if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == NULL) { + return NULL; + } + if (size <= 0 ) { // we don't want to rely on malloc(0) + run->runs = NULL; + } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * size)) == NULL) { + roaring_free(run); + return NULL; + } + run->capacity = size; + run->n_runs = 0; + return run; +} -#endif +int run_container_shrink_to_fit(run_container_t *src) { + if (src->n_runs == src->capacity) return 0; // nothing to do + int savings = src->capacity - src->n_runs; + src->capacity = src->n_runs; + rle16_t *oldruns = src->runs; + src->runs = (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t)); + if (src->runs == NULL) roaring_free(oldruns); // should never happen? + return savings; +} +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create(void) { + return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); +} -#ifdef CROARING_IS_X64 -CROARING_TARGET_AVX2 -size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, - uint32_t *out, size_t outcapacity, - uint32_t base) { - uint32_t *initout = out; - __m256i baseVec = _mm256_set1_epi32(base - 1); - __m256i incVec = _mm256_set1_epi32(64); - __m256i add8 = _mm256_set1_epi32(8); - uint32_t *safeout = out + outcapacity; - size_t i = 0; - for (; (i < length) && (out + 64 <= safeout); ++i) { - uint64_t w = words[i]; - if (w == 0) { - baseVec = _mm256_add_epi32(baseVec, incVec); +run_container_t *run_container_clone(const run_container_t *src) { + run_container_t *run = run_container_create_given_capacity(src->capacity); + if (run == NULL) return NULL; + run->capacity = src->capacity; + run->n_runs = src->n_runs; + memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); + return run; +} + +void run_container_offset(const run_container_t *c, + container_t **loc, container_t **hic, + uint16_t offset) { + run_container_t *lo = NULL, *hi = NULL; + + bool split; + int lo_cap, hi_cap; + int top, pivot; + + top = (1 << 16) - offset; + pivot = run_container_index_equalorlarger(c, top); + + if (pivot == -1) { + split = false; + lo_cap = c->n_runs; + hi_cap = 0; + } else { + split = c->runs[pivot].value <= top; + lo_cap = pivot + (split ? 1 : 0); + hi_cap = c->n_runs - pivot; + } + + if (loc && lo_cap) { + lo = run_container_create_given_capacity(lo_cap); + memcpy(lo->runs, c->runs, lo_cap*sizeof(rle16_t)); + lo->n_runs = lo_cap; + for (int i = 0; i < lo_cap; ++i) { + lo->runs[i].value += offset; + } + *loc = (container_t*)lo; + } + + if (hic && hi_cap) { + hi = run_container_create_given_capacity(hi_cap); + memcpy(hi->runs, c->runs+pivot, hi_cap*sizeof(rle16_t)); + hi->n_runs = hi_cap; + for (int i = 0; i < hi_cap; ++i) { + hi->runs[i].value += offset; + } + *hic = (container_t*)hi; + } + + // Fix the split. + if (split) { + if (lo != NULL) { + // Add the missing run to 'lo', exhausting length. + lo->runs[lo->n_runs-1].length = (1 << 16) - lo->runs[lo->n_runs-1].value - 1; + } + + if (hi != NULL) { + // Fix the first run in 'hi'. + hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1; + hi->runs[0].value = 0; + } + } +} + +/* Free memory. */ +void run_container_free(run_container_t *run) { + if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise + roaring_free(run->runs); + run->runs = NULL; // pedantic + } + roaring_free(run); +} + +void run_container_grow(run_container_t *run, int32_t min, bool copy) { + int32_t newCapacity = + (run->capacity == 0) + ? RUN_DEFAULT_INIT_SIZE + : run->capacity < 64 ? run->capacity * 2 + : run->capacity < 1024 ? run->capacity * 3 / 2 + : run->capacity * 5 / 4; + if (newCapacity < min) newCapacity = min; + run->capacity = newCapacity; + assert(run->capacity >= min); + if (copy) { + rle16_t *oldruns = run->runs; + run->runs = + (rle16_t *)roaring_realloc(oldruns, run->capacity * sizeof(rle16_t)); + if (run->runs == NULL) roaring_free(oldruns); + } else { + // Jon Strabala reports that some tools complain otherwise + if (run->runs != NULL) { + roaring_free(run->runs); + } + run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t)); + } + // handle the case where realloc fails + if (run->runs == NULL) { + fprintf(stderr, "could not allocate memory\n"); + } + assert(run->runs != NULL); +} + +/* copy one container into another */ +void run_container_copy(const run_container_t *src, run_container_t *dst) { + const int32_t n_runs = src->n_runs; + if (src->n_runs > dst->capacity) { + run_container_grow(dst, n_runs, false); + } + dst->n_runs = n_runs; + memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); +} + +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_union(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // TODO: this could be a lot more efficient + + // we start out with inexpensive checks + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + run_container_copy(src_1, dst); + return; + } + if (if2) { + run_container_copy(src_2, dst); + return; + } + } + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + dst->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + + rle16_t previousrle; + if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { + previousrle = run_container_append_first(dst, src_1->runs[rlepos]); + rlepos++; + } else { + previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); + xrlepos++; + } + + while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { + rle16_t newrl; + if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { + newrl = src_1->runs[rlepos]; + rlepos++; } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m256i vecA = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); - __m256i vecB = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm256_add_epi32(baseVec, vecA); - baseVec = _mm256_add_epi32(baseVec, add8); - vecB = _mm256_add_epi32(baseVec, vecB); - baseVec = _mm256_add_epi32(baseVec, add8); - _mm256_storeu_si256((__m256i *)out, vecA); - out += advanceA; - _mm256_storeu_si256((__m256i *)out, vecB); - out += advanceB; + newrl = src_2->runs[xrlepos]; + xrlepos++; + } + run_container_append(dst, newrl, &previousrle); + } + while (xrlepos < src_2->n_runs) { + run_container_append(dst, src_2->runs[xrlepos], &previousrle); + xrlepos++; + } + while (rlepos < src_1->n_runs) { + run_container_append(dst, src_1->runs[rlepos], &previousrle); + rlepos++; + } +} + +/* Compute the union of `src_1' and `src_2' and write the result to `src_1' + */ +void run_container_union_inplace(run_container_t *src_1, + const run_container_t *src_2) { + // TODO: this could be a lot more efficient + + // we start out with inexpensive checks + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return; + } + if (if2) { + run_container_copy(src_2, src_1); + return; + } + } + // we move the data to the end of the current array + const int32_t maxoutput = src_1->n_runs + src_2->n_runs; + const int32_t neededcapacity = maxoutput + src_1->n_runs; + if (src_1->capacity < neededcapacity) + run_container_grow(src_1, neededcapacity, true); + memmove(src_1->runs + maxoutput, src_1->runs, + src_1->n_runs * sizeof(rle16_t)); + rle16_t *inputsrc1 = src_1->runs + maxoutput; + const int32_t input1nruns = src_1->n_runs; + src_1->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + + rle16_t previousrle; + if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { + previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); + rlepos++; + } else { + previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); + xrlepos++; + } + while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { + rle16_t newrl; + if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { + newrl = inputsrc1[rlepos]; + rlepos++; + } else { + newrl = src_2->runs[xrlepos]; + xrlepos++; + } + run_container_append(src_1, newrl, &previousrle); + } + while (xrlepos < src_2->n_runs) { + run_container_append(src_1, src_2->runs[xrlepos], &previousrle); + xrlepos++; + } + while (rlepos < input1nruns) { + run_container_append(src_1, inputsrc1[rlepos], &previousrle); + rlepos++; + } +} + +/* Compute the symmetric difference of `src_1' and `src_2' and write the result + * to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // don't bother to convert xor with full range into negation + // since negation is implemented similarly + + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + + int32_t pos1 = 0; + int32_t pos2 = 0; + dst->n_runs = 0; + + while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { + if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { + run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, + src_1->runs[pos1].length); + pos1++; + } else { + run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, + src_2->runs[pos2].length); + pos2++; + } + } + while (pos1 < src_1->n_runs) { + run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, + src_1->runs[pos1].length); + pos1++; + } + + while (pos2 < src_2->n_runs) { + run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, + src_2->runs[pos2].length); + pos2++; + } +} + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_intersection(const run_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + run_container_copy(src_2, dst); + return; + } + if (if2) { + run_container_copy(src_1, dst); + return; + } + } + // TODO: this could be a lot more efficient, could use SIMD optimizations + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + dst->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + const int32_t lateststart = start > xstart ? start : xstart; + int32_t earliestend; + if (end == xend) { // improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + + } else { // end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } + dst->runs[dst->n_runs].value = (uint16_t)lateststart; + dst->runs[dst->n_runs].length = + (uint16_t)(earliestend - lateststart - 1); + dst->n_runs++; } } - base += i * 64; - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w ^= t; +} + +/* Compute the size of the intersection of src_1 and src_2 . */ +int run_container_intersection_cardinality(const run_container_t *src_1, + const run_container_t *src_2) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return run_container_cardinality(src_2); + } + if (if2) { + return run_container_cardinality(src_1); } - base += 64; } - return out - initout; + int answer = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + const int32_t lateststart = start > xstart ? start : xstart; + int32_t earliestend; + if (end == xend) { // improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + + } else { // end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } + answer += earliestend - lateststart; + } + } + return answer; } -CROARING_UNTARGET_REGION -#endif // CROARING_IS_X64 -size_t bitset_extract_setbits(const uint64_t *words, size_t length, - uint32_t *out, uint32_t base) { +bool run_container_intersect(const run_container_t *src_1, + const run_container_t *src_2) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return !run_container_empty(src_2); + } + if (if2) { + return !run_container_empty(src_1); + } + } + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + return true; + } + } + return false; +} + + +/* Compute the difference of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_andnot(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // following Java implementation as of June 2016 + + if (dst->capacity < src_1->n_runs + src_2->n_runs) + run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); + + dst->n_runs = 0; + + int rlepos1 = 0; + int rlepos2 = 0; + int32_t start = src_1->runs[rlepos1].value; + int32_t end = start + src_1->runs[rlepos1].length + 1; + int32_t start2 = src_2->runs[rlepos2].value; + int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; + + while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { + if (end <= start2) { + // output the first run + dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1); + rlepos1++; + if (rlepos1 < src_1->n_runs) { + start = src_1->runs[rlepos1].value; + end = start + src_1->runs[rlepos1].length + 1; + } + } else if (end2 <= start) { + // exit the second run + rlepos2++; + if (rlepos2 < src_2->n_runs) { + start2 = src_2->runs[rlepos2].value; + end2 = start2 + src_2->runs[rlepos2].length + 1; + } + } else { + if (start < start2) { + dst->runs[dst->n_runs++] = + MAKE_RLE16(start, start2 - start - 1); + } + if (end2 < end) { + start = end2; + } else { + rlepos1++; + if (rlepos1 < src_1->n_runs) { + start = src_1->runs[rlepos1].value; + end = start + src_1->runs[rlepos1].length + 1; + } + } + } + } + if (rlepos1 < src_1->n_runs) { + dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1); + rlepos1++; + if (rlepos1 < src_1->n_runs) { + memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, + sizeof(rle16_t) * (src_1->n_runs - rlepos1)); + dst->n_runs += src_1->n_runs - rlepos1; + } + } +} + +ALLOW_UNALIGNED +int run_container_to_uint32_array(void *vout, const run_container_t *cont, + uint32_t base) { int outpos = 0; - size_t i; for (i = 0; i < length; ++i) { - uint64_t w = words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; + uint32_t *out = (uint32_t *)vout; + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + for (int j = 0; j <= le; ++j) { + uint32_t val = run_start + j; memcpy(out + outpos, &val, sizeof(uint32_t)); // should be compiled as a MOV on x64 outpos++; - w ^= t; } - base += 64; } return outpos; } -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1, - const uint64_t * __restrict__ words2, - size_t length, uint16_t *out, - uint16_t base) { - int outpos = 0; - size_t i; for (i = 0; i < length; ++i) { - uint64_t w = words1[i] & words2[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; +/* + * Print this container using printf (useful for debugging). + */ +void run_container_printf(const run_container_t *cont) { + for (int i = 0; i < cont->n_runs; ++i) { + uint16_t run_start = cont->runs[i].value; + uint16_t le = cont->runs[i].length; + printf("[%d,%d]", run_start, run_start + le); } - return outpos; } -#ifdef CROARING_IS_X64 /* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function uses SSE decoding. + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. */ -CROARING_TARGET_AVX2 -static size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base) { - uint16_t *initout = out; - __m128i baseVec = _mm_set1_epi16(base - 1); - __m128i incVec = _mm_set1_epi16(64); - __m128i add8 = _mm_set1_epi16(8); - uint16_t *safeout = out + outcapacity; - const int numberofbytes = 2; // process two bytes at a time - size_t i = 0; - for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { - uint64_t w = words[i]; - if (w == 0) { - baseVec = _mm_add_epi16(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m128i vecA = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteA]); - __m128i vecB = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm_add_epi16(baseVec, vecA); - baseVec = _mm_add_epi16(baseVec, add8); - vecB = _mm_add_epi16(baseVec, vecB); - baseVec = _mm_add_epi16(baseVec, add8); - _mm_storeu_si128((__m128i *)out, vecA); - out += advanceA; - _mm_storeu_si128((__m128i *)out, vecB); - out += advanceB; +void run_container_printf_as_uint32_array(const run_container_t *cont, + uint32_t base) { + if (cont->n_runs == 0) return; + { + uint32_t run_start = base + cont->runs[0].value; + uint16_t le = cont->runs[0].length; + printf("%u", run_start); + for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); + } + for (int32_t i = 1; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); + } +} + +int32_t run_container_write(const run_container_t *container, char *buf) { + uint16_t cast_16 = container->n_runs; + memcpy(buf, &cast_16, sizeof(uint16_t)); + memcpy(buf + sizeof(uint16_t), container->runs, + container->n_runs * sizeof(rle16_t)); + return run_container_size_in_bytes(container); +} + +int32_t run_container_read(int32_t cardinality, run_container_t *container, + const char *buf) { + (void)cardinality; + uint16_t cast_16; + memcpy(&cast_16, buf, sizeof(uint16_t)); + container->n_runs = cast_16; + if (container->n_runs > container->capacity) + run_container_grow(container, container->n_runs, false); + if(container->n_runs > 0) { + memcpy(container->runs, buf + sizeof(uint16_t), + container->n_runs * sizeof(rle16_t)); + } + return run_container_size_in_bytes(container); +} + +bool run_container_iterate(const run_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr) { + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + + for (int j = 0; j <= le; ++j) + if (!iterator(run_start + j, ptr)) return false; + } + return true; +} + +bool run_container_iterate64(const run_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr) { + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + + for (int j = 0; j <= le; ++j) + if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) + return false; + } + return true; +} + +bool run_container_is_subset(const run_container_t *container1, + const run_container_t *container2) { + int i1 = 0, i2 = 0; + while (i1 < container1->n_runs && i2 < container2->n_runs) { + int start1 = container1->runs[i1].value; + int stop1 = start1 + container1->runs[i1].length; + int start2 = container2->runs[i2].value; + int stop2 = start2 + container2->runs[i2].length; + if (start1 < start2) { + return false; + } else { // start1 >= start2 + if (stop1 < stop2) { + i1++; + } else if (stop1 == stop2) { + i1++; + i2++; + } else { // stop1 > stop2 + i2++; } } } - base += (uint16_t)(i * 64); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - *out = r + base; - out++; - w ^= t; + if (i1 == container1->n_runs) { + return true; + } else { + return false; + } +} + +// TODO: write smart_append_exclusive version to match the overloaded 1 param +// Java version (or is it even used?) + +// follows the Java implementation closely +// length is the rle-value. Ie, run [10,12) uses a length value 1. +void run_container_smart_append_exclusive(run_container_t *src, + const uint16_t start, + const uint16_t length) { + int old_end; + rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; + rle16_t *appended_last_run = src->runs + src->n_runs; + + if (!src->n_runs || + (start > (old_end = last_run->value + last_run->length + 1))) { + *appended_last_run = MAKE_RLE16(start, length); + src->n_runs++; + return; + } + if (old_end == start) { + // we merge + last_run->length += (length + 1); + return; + } + int new_end = start + length + 1; + + if (start == last_run->value) { + // wipe out previous + if (new_end < old_end) { + *last_run = MAKE_RLE16(new_end, old_end - new_end - 1); + return; + } else if (new_end > old_end) { + *last_run = MAKE_RLE16(old_end, new_end - old_end - 1); + return; + } else { + src->n_runs--; + return; } - base += 64; } - return out - initout; + last_run->length = start - last_run->value - 1; + if (new_end < old_end) { + *appended_last_run = MAKE_RLE16(new_end, old_end - new_end - 1); + src->n_runs++; + } else if (new_end > old_end) { + *appended_last_run = MAKE_RLE16(old_end, new_end - old_end - 1); + src->n_runs++; + } } + +bool run_container_select(const run_container_t *container, + uint32_t *start_rank, uint32_t rank, + uint32_t *element) { + for (int i = 0; i < container->n_runs; i++) { + uint16_t length = container->runs[i].length; + if (rank <= *start_rank + length) { + uint16_t value = container->runs[i].value; + *element = value + rank - (*start_rank); + return true; + } else + *start_rank += length + 1; + } + return false; +} + +int run_container_rank(const run_container_t *container, uint16_t x) { + int sum = 0; + uint32_t x32 = x; + for (int i = 0; i < container->n_runs; i++) { + uint32_t startpoint = container->runs[i].value; + uint32_t length = container->runs[i].length; + uint32_t endpoint = length + startpoint; + if (x <= endpoint) { + if (x < startpoint) break; + return sum + (x32 - startpoint) + 1; + } else { + sum += length + 1; + } + } + return sum; +} + +#ifdef CROARING_IS_X64 + +CROARING_TARGET_AVX2 +ALLOW_UNALIGNED +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _avx2_run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + int32_t k = 0; + const int32_t step = sizeof(__m256i) / sizeof(rle16_t); + if (n_runs > step) { + __m256i total = _mm256_setzero_si256(); + for (; k + step <= n_runs; k += step) { + __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); + __m256i justlengths = _mm256_srli_epi32(ymm1, 16); + total = _mm256_add_epi32(total, justlengths); + } + // a store might be faster than extract? + uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; + _mm256_storeu_si256((__m256i *)buffer, total); + sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + + (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); + } + for (; k < n_runs; ++k) { + sum += runs[k].length; + } + + return sum; +} + CROARING_UNTARGET_REGION + +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _scalar_run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + for (int k = 0; k < n_runs; ++k) { + sum += runs[k].length; + } + + return sum; +} + +int run_container_cardinality(const run_container_t *run) { + if( croaring_avx2() ) { + return _avx2_run_container_cardinality(run); + } else { + return _scalar_run_container_cardinality(run); + } +} +#else + +/* Get the cardinality of `run'. Requires an actual computation. */ +int run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + for (int k = 0; k < n_runs; ++k) { + sum += runs[k].length; + } + + return sum; +} #endif -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" (can be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/run.c */ +/* begin file src/memory.c */ +#include <stdlib.h> + +// without the following, we get lots of warnings about posix_memalign +#ifndef __cplusplus +extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); +#endif //__cplusplus // C++ does not have a well defined signature + +// portable version of posix_memalign +static void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { + void *p; +#ifdef _MSC_VER + p = _aligned_malloc(size, alignment); +#elif defined(__MINGW32__) || defined(__MINGW64__) + p = __mingw_aligned_malloc(size, alignment); +#else + // somehow, if this is used before including "x86intrin.h", it creates an + // implicit defined warning. + if (posix_memalign(&p, alignment, size) != 0) return NULL; +#endif + return p; +} + +static void roaring_bitmap_aligned_free(void *memblock) { +#ifdef _MSC_VER + _aligned_free(memblock); +#elif defined(__MINGW32__) || defined(__MINGW64__) + __mingw_aligned_free(memblock); +#else + free(memblock); +#endif +} + +static roaring_memory_t global_memory_hook = { + .malloc = malloc, + .realloc = realloc, + .calloc = calloc, + .free = free, + .aligned_malloc = roaring_bitmap_aligned_malloc, + .aligned_free = roaring_bitmap_aligned_free, +}; + +void roaring_init_memory_hook(roaring_memory_t memory_hook) { + global_memory_hook = memory_hook; +} + +void* roaring_malloc(size_t n) { + return global_memory_hook.malloc(n); +} + +void* roaring_realloc(void* p, size_t new_sz) { + return global_memory_hook.realloc(p, new_sz); +} + +void* roaring_calloc(size_t n_elements, size_t element_size) { + return global_memory_hook.calloc(n_elements, element_size); +} + +void roaring_free(void* p) { + global_memory_hook.free(p); +} + +void* roaring_aligned_malloc(size_t alignment, size_t size) { + return global_memory_hook.aligned_malloc(alignment, size); +} + +void roaring_aligned_free(void* p) { + global_memory_hook.aligned_free(p); +} +/* end file src/memory.c */ +/* begin file src/roaring.c */ +#include <assert.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + + + +#ifdef __cplusplus +using namespace ::roaring::internal; + +extern "C" { namespace roaring { namespace api { +#endif + +#define CROARING_SERIALIZATION_ARRAY_UINT32 1 +#define CROARING_SERIALIZATION_CONTAINER 2 + +extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r); +extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow); + +static inline bool is_cow(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_COW; +} +static inline bool is_frozen(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_FROZEN; +} + +// this is like roaring_bitmap_add, but it populates pointer arguments in such a +// way +// that we can recover the container touched, which, in turn can be used to +// accelerate some functions (when you repeatedly need to add to the same +// container) +static inline container_t *containerptr_roaring_bitmap_add( + roaring_bitmap_t *r, uint32_t val, + uint8_t *type, int *index +){ + roaring_array_t *ra = &r->high_low_container; + + uint16_t hb = val >> 16; + const int i = ra_get_index(ra, hb); + if (i >= 0) { + ra_unshare_container_at_index(ra, i); + container_t *c = ra_get_container_at_index(ra, i, type); + uint8_t new_type = *type; + container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); + *index = i; + if (c2 != c) { + container_free(c, *type); + ra_set_container_at_index(ra, i, c2, new_type); + *type = new_type; + return c2; + } else { + return c; + } + } else { + array_container_t *new_ac = array_container_create(); + container_t *c = container_add(new_ac, val & 0xFFFF, + ARRAY_CONTAINER_TYPE, type); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); + *index = -i - 1; + return c; + } +} + +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); + if (!is_ok) { + roaring_free(ans); + return NULL; + } + return ans; +} + +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { + return ra_init_with_capacity(&r->high_low_container, cap); +} + +static inline void add_bulk_impl(roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + uint8_t typecode; + int idx; + context->container = containerptr_roaring_bitmap_add( + r, val, &typecode, &idx); + context->typecode = typecode; + context->idx = idx; + context->key = key; + } else { + // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion directly, bypassing the roaring_bitmap_add call + uint8_t new_typecode; + container_t *container2 = container_add( + context->container, val & 0xFFFF, context->typecode, &new_typecode); + if (container2 != context->container) { + // rare instance when we need to change the container type + container_free(context->container, context->typecode); + ra_set_container_at_index(&r->high_low_container, context->idx, + container2, new_typecode); + context->typecode = new_typecode; + context->container = container2; + } + } +} + +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + uint32_t val; + const uint32_t *start = vals; + const uint32_t *end = vals + n_args; + const uint32_t *current_val = start; + + if (n_args == 0) { + return; + } + + uint8_t typecode; + int idx; + container_t *container; + val = *current_val; + container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode}; + + for (; current_val != end; current_val++) { + memcpy(&val, current_val, sizeof(val)); + add_bulk_impl(r, &context, val); + } +} + +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val) { + add_bulk_impl(r, context, val); +} + +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) +{ + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + int32_t start_idx = -1; + if (context->container != NULL && context->key < key) { + start_idx = context->idx; + } + int idx = ra_advance_until(&r->high_low_container, key, start_idx); + if (idx == ra_get_size(&r->high_low_container)) { + return false; + } + uint8_t typecode; + context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode); + context->typecode = typecode; + context->idx = idx; + context->key = ra_get_key_at_index(&r->high_low_container, idx); + // ra_advance_until finds the next key >= the target, we found a later container. + if (context->key != key) { + return false; + } + } + // context is now set up + return container_contains(context->container, val & 0xFFFF, context->typecode); +} + +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bitmap_add_many(answer, n_args, vals); + return answer; +} + +roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { + // todo: could be greatly optimized but we do not expect this call to ever + // include long lists + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bulk_context_t context; + va_list ap; + + memset(&context, 0, sizeof(context)); + va_start(ap, n_args); + for (size_t i = 0; i < n_args; i++) { + uint32_t val = va_arg(ap, uint32_t); + roaring_bitmap_add_bulk(answer, &context, val); + } + va_end(ap); + return answer; +} + +static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { + return (a < b) ? a : b; +} + +static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { + return (a < b) ? a : b; +} + +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, + uint32_t step) { + if(max >= UINT64_C(0x100000000)) { + max = UINT64_C(0x100000000); + } + if (step == 0) return NULL; + if (max <= min) return NULL; + roaring_bitmap_t *answer = roaring_bitmap_create(); + if (step >= (1 << 16)) { + for (uint32_t value = (uint32_t)min; value < max; value += step) { + roaring_bitmap_add(answer, value); + } + return answer; + } + uint64_t min_tmp = min; + do { + uint32_t key = (uint32_t)min_tmp >> 16; + uint32_t container_min = min_tmp & 0xFFFF; + uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); + uint8_t type; + container_t *container = container_from_range(&type, container_min, + container_max, (uint16_t)step); + ra_append(&answer->high_low_container, key, container, type); + uint32_t gap = container_max - container_min + step - 1; + min_tmp += gap - (gap % step); + } while (min_tmp < max); + // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step + return answer; +} + +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) { + if (min > max) { + return; + } + + roaring_array_t *ra = &r->high_low_container; + + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; + + int32_t num_required_containers = max_key - min_key + 1; + int32_t suffix_length = count_greater(ra->keys, ra->size, max_key); + int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length, + min_key); + int32_t common_length = ra->size - prefix_length - suffix_length; + + if (num_required_containers > common_length) { + ra_shift_tail(ra, suffix_length, + num_required_containers - common_length); + } + + int32_t src = prefix_length + common_length - 1; + int32_t dst = ra->size - suffix_length - 1; + for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 + uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; + uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; + container_t* new_container; + uint8_t new_type; + + if (src >= 0 && ra->keys[src] == key) { + ra_unshare_container_at_index(ra, src); + new_container = container_add_range(ra->containers[src], + ra->typecodes[src], + container_min, container_max, + &new_type); + if (new_container != ra->containers[src]) { + container_free(ra->containers[src], + ra->typecodes[src]); + } + src--; + } else { + new_container = container_from_range(&new_type, container_min, + container_max+1, 1); + } + ra_replace_key_and_container_at_index(ra, dst, key, new_container, + new_type); + dst--; + } +} + +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) { + if (min > max) { + return; + } + + roaring_array_t *ra = &r->high_low_container; + + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; + + int32_t src = count_less(ra->keys, ra->size, min_key); + int32_t dst = src; + while (src < ra->size && ra->keys[src] <= max_key) { + uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0; + uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; + ra_unshare_container_at_index(ra, src); + container_t *new_container; + uint8_t new_type; + new_container = container_remove_range(ra->containers[src], + ra->typecodes[src], + container_min, container_max, + &new_type); + if (new_container != ra->containers[src]) { + container_free(ra->containers[src], + ra->typecodes[src]); + } + if (new_container) { + ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], + new_container, new_type); + dst++; + } + src++; + } + if (src > dst) { + ra_shift_tail(ra, ra->size - src, dst - src); + } +} + +extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max); +extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max); + +void roaring_bitmap_printf(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; + + printf("{"); + for (int i = 0; i < ra->size; ++i) { + container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + + if (i + 1 < ra->size) { + printf(","); + } + } + printf("}"); +} + +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; + + printf("{"); + for (int i = 0; i < ra->size; ++i) { + printf("%d: %s (%d)", ra->keys[i], + get_full_container_name(ra->containers[i], ra->typecodes[i]), + container_get_cardinality(ra->containers[i], ra->typecodes[i])); + if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { + printf( + "(shared count = %" PRIu32 " )", + CAST_shared(ra->containers[i])->counter); + } + + if (i + 1 < ra->size) { + printf(", "); + } + } + printf("}"); +} + +typedef struct min_max_sum_s { + uint32_t min; + uint32_t max; + uint64_t sum; +} min_max_sum_t; + +static bool min_max_sum_fnc(uint32_t value, void *param) { + min_max_sum_t *mms = (min_max_sum_t *)param; + if (value > mms->max) mms->max = value; + if (value < mms->min) mms->min = value; + mms->sum += value; + return true; // we always process all data points +} + +/** +* (For advanced users.) +* Collect statistics about the bitmap +*/ +void roaring_bitmap_statistics(const roaring_bitmap_t *r, + roaring_statistics_t *stat) { + const roaring_array_t *ra = &r->high_low_container; + + memset(stat, 0, sizeof(*stat)); + stat->n_containers = ra->size; + stat->cardinality = roaring_bitmap_get_cardinality(r); + min_max_sum_t mms; + mms.min = UINT32_C(0xFFFFFFFF); + mms.max = UINT32_C(0); + mms.sum = 0; + roaring_iterate(r, &min_max_sum_fnc, &mms); + stat->min_value = mms.min; + stat->max_value = mms.max; + stat->sum_value = mms.sum; + + for (int i = 0; i < ra->size; ++i) { + uint8_t truetype = + get_container_type(ra->containers[i], ra->typecodes[i]); + uint32_t card = + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + uint32_t sbytes = + container_size_in_bytes(ra->containers[i], ra->typecodes[i]); + switch (truetype) { + case BITSET_CONTAINER_TYPE: + stat->n_bitset_containers++; + stat->n_values_bitset_containers += card; + stat->n_bytes_bitset_containers += sbytes; + break; + case ARRAY_CONTAINER_TYPE: + stat->n_array_containers++; + stat->n_values_array_containers += card; + stat->n_bytes_array_containers += sbytes; + break; + case RUN_CONTAINER_TYPE: + stat->n_run_containers++; + stat->n_values_run_containers += card; + stat->n_bytes_run_containers += sbytes; + break; + default: + assert(false); + __builtin_unreachable(); + } + } +} + +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + if (!ra_init_with_capacity( // allocation of list of containers can fail + &ans->high_low_container, r->high_low_container.size) + ){ + roaring_free(ans); + return NULL; + } + if (!ra_overwrite( // memory allocation of individual containers may fail + &r->high_low_container, &ans->high_low_container, is_cow(r)) + ){ + roaring_bitmap_free(ans); // overwrite should leave in freeable state + return NULL; + } + roaring_bitmap_set_copy_on_write(ans, is_cow(r)); + return ans; +} + +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, + const roaring_bitmap_t *src) { + roaring_bitmap_set_copy_on_write(dest, is_cow(src)); + return ra_overwrite(&src->high_low_container, &dest->high_low_container, + is_cow(src)); +} + +void roaring_bitmap_free(const roaring_bitmap_t *r) { + if (!is_frozen(r)) { + ra_clear((roaring_array_t*)&r->high_low_container); + } + roaring_free((roaring_bitmap_t*)r); +} + +void roaring_bitmap_clear(roaring_bitmap_t *r) { + ra_reset(&r->high_low_container); +} + +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { + roaring_array_t *ra = &r->high_low_container; + + const uint16_t hb = val >> 16; + const int i = ra_get_index(ra, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(ra, i); + container_t *container = + ra_get_container_at_index(ra, i, &typecode); + uint8_t newtypecode = typecode; + container_t *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + } else { + array_container_t *newac = array_container_create(); + container_t *container = container_add(newac, val & 0xFFFF, + ARRAY_CONTAINER_TYPE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); + } +} + +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + container_t *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + container_t *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + result = true; + } else { + const int newCardinality = + container_get_cardinality(container, newtypecode); + + result = oldCardinality != newCardinality; + } + } else { + array_container_t *newac = array_container_create(); + container_t *container = container_add(newac, val & 0xFFFF, + ARRAY_CONTAINER_TYPE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); + result = true; + } + + return result; +} + +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + container_t *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + uint8_t newtypecode = typecode; + container_t *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + if (container_get_cardinality(container2, newtypecode) != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); + } + } +} + +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + container_t *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + container_t *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + + const int newCardinality = + container_get_cardinality(container2, newtypecode); + + if (newCardinality != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); + } + + result = oldCardinality != newCardinality; + } + return result; +} + +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + if (n_args == 0 || r->high_low_container.size == 0) { + return; + } + int32_t pos = -1; // position of the container used in the previous iteration + for (size_t i = 0; i < n_args; i++) { + uint16_t key = (uint16_t)(vals[i] >> 16); + if (pos < 0 || key != r->high_low_container.keys[pos]) { + pos = ra_get_index(&r->high_low_container, key); + } + if (pos >= 0) { + uint8_t new_typecode; + container_t *new_container; + new_container = container_remove(r->high_low_container.containers[pos], + vals[i] & 0xffff, + r->high_low_container.typecodes[pos], + &new_typecode); + if (new_container != r->high_low_container.containers[pos]) { + container_free(r->high_low_container.containers[pos], + r->high_low_container.typecodes[pos]); + ra_replace_key_and_container_at_index(&r->high_low_container, + pos, key, new_container, + new_typecode); + } + if (!container_nonzero_cardinality(new_container, new_typecode)) { + container_free(new_container, new_typecode); + ra_remove_at_index(&r->high_low_container, pos); + pos = -1; + } + } + } +} + +// there should be some SIMD optimizations possible here +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = container_and(c1, type1, c2, type2, &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); // otherwise: memory leak! + } + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer; +} + +/** + * Compute the union of 'number' bitmaps. */ -static size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, - uint16_t *out, uint16_t base) { - int outpos = 0; - size_t i; for (i = 0; i < length; ++i) { - uint64_t w = words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_bitmap_t *answer = + roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +/** + * Compute the xor of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_xor_inplace(answer, x[i]); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +// inplace and (modifies its first argument). +void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + if (x1 == x2) return; + int pos1 = 0, pos2 = 0, intersection_size = 0; + const int length1 = ra_get_size(&x1->high_low_container); + const int length2 = ra_get_size(&x2->high_low_container); + + // any skipped-over or newly emptied containers in x1 + // have to be freed. + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t type1, type2, result_type; + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + + // We do the computation "in place" only when c1 is not a shared container. + // Rationale: using a shared container safely with in place computation would + // require making a copy and then doing the computation in place which is likely + // less efficient than avoiding in place entirely and always generating a new + // container. + container_t *c = + (type1 == SHARED_CONTAINER_TYPE) + ? container_and(c1, type1, c2, type2, &result_type) + : container_iand(c1, type1, c2, type2, &result_type); + + if (c != c1) { // in this instance a new container was created, and + // we need to free the old one + container_free(c1, type1); + } + if (container_nonzero_cardinality(c, result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size, s1, c, + result_type); + intersection_size++; + } else { + container_free(c, result_type); + } + ++pos1; + ++pos2; + } else if (s1 < s2) { + pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } - base += 64; } - return outpos; + + // if we ended early because x2 ran out, then all remaining in x1 should be + // freed + while (pos1 < length1) { + container_free(x1->high_low_container.containers[pos1], + x1->high_low_container.typecodes[pos1]); + ++pos1; + } + + // all containers after this have either been copied or freed + ra_downsize(&x1->high_low_container, intersection_size); } -#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = container_or(c1, type1, c2, type2, &result_type); -static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // TODO: could unroll for performance, see bitset_set_list - // bts is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)\n" - "sbb $-1, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); - return card; + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + // c1 = container_clone(c1, type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + // c2 = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; } -static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t pos; - const uint16_t *end = list + length; +// inplace or (modifies its first argument). +void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; - uint64_t shift = 6; - uint64_t offset; - uint64_t load; - for (; list + 3 < end; list += 4) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[1]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[2]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[3]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + if (!container_is_full(c1, type1)) { + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = + (type1 == SHARED_CONTAINER_TYPE) + ? container_or(c1, type1, c2, type2, &result_type) + : container_ior(c1, type1, c2, type2, &result_type); - while (list != end) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - list++; + if (c != c1) { // in this instance a new container was created, + // and we need to free the old one + container_free(c1, type1); + } + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + + // container_t *c2_clone = container_clone(c2, type2); + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); } } -static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // btr is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "btr %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)\n" - "sbb $0, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [words] "r"(words), [shift] "r"(shift) - : - /* clobbers */ "memory"); - return card; +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = container_xor(c1, type1, c2, type2, &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; } -static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - words[offset] = newload; - list++; +// inplace xor (modifies its first argument). + +void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + + // XOR can have new containers inserted from x2, but can also + // lose containers when x1 and x2 are nonempty and identical. + + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + + // We do the computation "in place" only when c1 is not a shared container. + // Rationale: using a shared container safely with in place computation would + // require making a copy and then doing the computation in place which is likely + // less efficient than avoiding in place entirely and always generating a new + // container. + + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_xor(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // so release + } + else { + c = container_ixor(c1, type1, c2, type2, &result_type); + } + + if (container_nonzero_cardinality(c, result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + ++pos1; + } else { + container_free(c, result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2)); + return empty_bitmap; + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = 0; + uint16_t s2 = 0; + while (true) { + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = container_andnot(c1, type1, c2, type2, + &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + } else if (s1 < s2) { // s1 < s2 + const int next_pos1 = + ra_advance_until(&x1->high_low_container, s2, pos1); + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, next_pos1, + is_cow(x1)); + // TODO : perhaps some of the copy_on_write should be based on + // answer rather than x1 (more stringent?). Many similar cases + pos1 = next_pos1; + if (pos1 == length1) break; + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (pos2 == length2) break; + } + } + if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +// inplace andnot (modifies its first argument). + +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + int intersection_size = 0; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_clear(x1); + return; + } + + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + + // We do the computation "in place" only when c1 is not a shared container. + // Rationale: using a shared container safely with in place computation would + // require making a copy and then doing the computation in place which is likely + // less efficient than avoiding in place entirely and always generating a new + // container. + + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_andnot(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // release + } + else { + c = container_iandnot(c1, type1, c2, type2, &result_type); + } + + if (container_nonzero_cardinality(c, result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size++, s1, + c, result_type); + } else { + container_free(c, result_type); + } + + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + if (pos1 != intersection_size) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size, s1, c1, + type1); + } + intersection_size++; + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + + if (pos1 < length1) { + // all containers between intersection_size and + // pos1 are junk. However, they have either been moved + // (thus still referenced) or involved in an iandnot + // that will clean up all containers that could not be reused. + // Thus we should not free the junk containers between + // intersection_size and pos1. + if (pos1 > intersection_size) { + // left slide of remaining items + ra_copy_range(&x1->high_low_container, pos1, length1, + intersection_size); + } + // else current placement is fine + intersection_size += (length1 - pos1); } + ra_downsize(&x1->high_low_container, intersection_size); +} + +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; + + uint64_t card = 0; + for (int i = 0; i < ra->size; ++i) + card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); return card; } -static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - words[offset] = newload; - list++; +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end) { + const roaring_array_t *ra = &r->high_low_container; + + if (range_end > UINT32_MAX) { + range_end = UINT32_MAX + UINT64_C(1); + } + if (range_start >= range_end) { + return 0; } + range_end--; // make range_end inclusive + // now we have: 0 <= range_start <= range_end <= UINT32_MAX + + uint16_t minhb = range_start >> 16; + uint16_t maxhb = range_end >> 16; + + uint64_t card = 0; + + int i = ra_get_index(ra, minhb); + if (i >= 0) { + if (minhb == maxhb) { + card += container_rank(ra->containers[i], ra->typecodes[i], + range_end & 0xffff); + } else { + card += container_get_cardinality(ra->containers[i], + ra->typecodes[i]); + } + if ((range_start & 0xffff) != 0) { + card -= container_rank(ra->containers[i], ra->typecodes[i], + (range_start & 0xffff) - 1); + } + i++; + } else { + i = -i - 1; + } + + for (; i < ra->size; i++) { + uint16_t key = ra->keys[i]; + if (key < maxhb) { + card += container_get_cardinality(ra->containers[i], + ra->typecodes[i]); + } else if (key == maxhb) { + card += container_rank(ra->containers[i], ra->typecodes[i], + range_end & 0xffff); + break; + } else { + break; + } + } + return card; } -static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - words[offset] = newload; - list++; + +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { + return r->high_low_container.size == 0; +} + +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { + ra_to_uint32_array(&r->high_low_container, ans); +} + +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, + size_t offset, size_t limit, + uint32_t *ans) { + return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); +} + +/** convert array and bitmap containers to run containers when it is more + * efficient; + * also convert from run containers when more space efficient. Returns + * true if the result has at least one run container. +*/ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original, type_after; + ra_unshare_container_at_index( + &r->high_low_container, i); // TODO: this introduces extra cloning! + container_t *c = ra_get_container_at_index(&r->high_low_container, i, + &type_original); + container_t *c1 = convert_run_optimize(c, type_original, &type_after); + if (type_after == RUN_CONTAINER_TYPE) { + answer = true; + } + ra_set_container_at_index(&r->high_low_container, i, c1, type_after); } + return answer; } -static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - if( croaring_avx2() ) { - return _asm_bitset_clear_list(words, card, list, length); +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { + size_t answer = 0; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original; + container_t *c = ra_get_container_at_index(&r->high_low_container, i, + &type_original); + answer += container_shrink_to_fit(c, type_original); + } + answer += ra_shrink_to_fit(&r->high_low_container); + return answer; +} + +/** + * Remove run-length encoding even when it is more space efficient + * return whether a change was applied + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original, type_after; + container_t *c = ra_get_container_at_index(&r->high_low_container, i, + &type_original); + if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { + answer = true; + if (type_original == SHARED_CONTAINER_TYPE) { + run_container_t *truec = CAST_run(CAST_shared(c)->container); + int32_t card = run_container_cardinality(truec); + container_t *c1 = convert_to_bitset_or_array_container( + truec, card, &type_after); + shared_container_free(CAST_shared(c)); // frees run as needed + ra_set_container_at_index(&r->high_low_container, i, c1, + type_after); + + } else { + int32_t card = run_container_cardinality(CAST_run(c)); + container_t *c1 = convert_to_bitset_or_array_container( + CAST_run(c), card, &type_after); + run_container_free(CAST_run(c)); + ra_set_container_at_index(&r->high_low_container, i, c1, + type_after); + } + } + } + return answer; +} + +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); + uint64_t cardinality = roaring_bitmap_get_cardinality(r); + uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); + if (portablesize < sizeasarray) { + buf[0] = CROARING_SERIALIZATION_CONTAINER; + return roaring_bitmap_portable_serialize(r, buf + 1) + 1; } else { - return _scalar_bitset_clear_list(words, card, list, length); + buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32; + memcpy(buf + 1, &cardinality, sizeof(uint32_t)); + roaring_bitmap_to_uint32_array( + r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); + return 1 + (size_t)sizeasarray; } } -static uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - if( croaring_avx2() ) { - return _asm_bitset_set_list_withcard(words, card, list, length); +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); + uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + + sizeof(uint32_t); + return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; +} + +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { + return ra_portable_size_in_bytes(&r->high_low_container); +} + + +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (ans == NULL) { + return NULL; + } + size_t bytesread; + bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); + if(is_ok) assert(bytesread <= maxbytes); + roaring_bitmap_set_copy_on_write(ans, false); + if (!is_ok) { + roaring_free(ans); + return NULL; + } + return ans; +} + +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { + return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); +} + + +size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { + return ra_portable_deserialize_size(buf, maxbytes); +} + + +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, + char *buf) { + return ra_portable_serialize(&r->high_low_container, buf); +} + +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { + const char *bufaschar = (const char *)buf; + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { + /* This looks like a compressed set of uint32_t elements */ + uint32_t card; + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + const uint32_t *elems = + (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; + } + roaring_bulk_context_t context; + + memset(&context, 0, sizeof(context)); + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); + } + return bitmap; + } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { + return roaring_bitmap_portable_deserialize(bufaschar + 1); + } else + return (NULL); +} + +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, + void *ptr) { + const roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) + if (!container_iterate(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16, + iterator, ptr)) { + return false; + } + return true; +} + +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, + uint64_t high_bits, void *ptr) { + const roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) + if (!container_iterate64( + ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16, iterator, + high_bits, ptr)) { + return false; + } + return true; +} + +/**** +* begin roaring_uint32_iterator_t +*****/ + +// Partially initializes the roaring iterator when it begins looking at +// a new container. +static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { + newit->in_container_index = 0; + newit->run_index = 0; + newit->current_value = 0; + if (newit->container_index >= newit->parent->high_low_container.size || + newit->container_index < 0) { + newit->current_value = UINT32_MAX; + return (newit->has_value = false); + } + // assume not empty + newit->has_value = true; + // we precompute container, typecode and highbits so that successive + // iterators do not have to grab them from odd memory locations + // and have to worry about the (easily predicted) container_unwrap_shared + // call. + newit->container = + newit->parent->high_low_container.containers[newit->container_index]; + newit->typecode = + newit->parent->high_low_container.typecodes[newit->container_index]; + newit->highbits = + ((uint32_t) + newit->parent->high_low_container.keys[newit->container_index]) + << 16; + newit->container = + container_unwrap_shared(newit->container, &(newit->typecode)); + return newit->has_value; +} + +static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { + if (!iter_new_container_partial_init(newit)) + return newit->has_value; + + switch (newit->typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(newit->container); + + uint32_t wordindex = 0; + uint64_t word; + while ((word = bc->words[wordindex]) == 0) { + wordindex++; // advance + } + // here "word" is non-zero + newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); + newit->current_value = newit->highbits | newit->in_container_index; + break; } + + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(newit->container); + newit->current_value = newit->highbits | ac->array[0]; + break; } + + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(newit->container); + newit->current_value = newit->highbits | rc->runs[0].value; + break; } + + default: + // if this ever happens, bug! + assert(false); + } // switch (typecode) + return true; +} + +static bool loadlastvalue(roaring_uint32_iterator_t* newit) { + if (!iter_new_container_partial_init(newit)) + return newit->has_value; + + switch(newit->typecode) { + case BITSET_CONTAINER_TYPE: { + uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; + uint64_t word; + const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; + while ((word = bitset_container->words[wordindex]) == 0) + --wordindex; + + int num_leading_zeros = __builtin_clzll(word); + newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); + newit->current_value = newit->highbits | newit->in_container_index; + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t* array_container = (const array_container_t*)newit->container; + newit->in_container_index = array_container->cardinality - 1; + newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t* run_container = (const run_container_t*)newit->container; + newit->run_index = run_container->n_runs - 1; + const rle16_t* last_run = &run_container->runs[newit->run_index]; + newit->current_value = newit->highbits | (last_run->value + last_run->length); + break; + } + default: + // if this ever happens, bug! + assert(false); + } + return true; +} + +// prerequesite: the value should be in range of the container +static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { + // Don't have to check return value because of prerequisite + iter_new_container_partial_init(newit); + uint16_t lb = val & 0xFFFF; + + switch (newit->typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(newit->container); + newit->in_container_index = + bitset_container_index_equalorlarger(bc, lb); + newit->current_value = newit->highbits | newit->in_container_index; + break; } + + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(newit->container); + newit->in_container_index = + array_container_index_equalorlarger(ac, lb); + newit->current_value = + newit->highbits | ac->array[newit->in_container_index]; + break; } + + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(newit->container); + newit->run_index = run_container_index_equalorlarger(rc, lb); + if (rc->runs[newit->run_index].value <= lb) { + newit->current_value = val; + } else { + newit->current_value = + newit->highbits | rc->runs[newit->run_index].value; + } + break; } + + default: + __builtin_unreachable(); + } + + return true; +} + +void roaring_init_iterator(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit) { + newit->parent = r; + newit->container_index = 0; + newit->has_value = loadfirstvalue(newit); +} + +void roaring_init_iterator_last(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit) { + newit->parent = r; + newit->container_index = newit->parent->high_low_container.size - 1; + newit->has_value = loadlastvalue(newit); +} + +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t)); + if (newit == NULL) return NULL; + roaring_init_iterator(r, newit); + return newit; +} + +roaring_uint32_iterator_t *roaring_copy_uint32_iterator( + const roaring_uint32_iterator_t *it) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t)); + memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); + return newit; +} + +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { + uint16_t hb = val >> 16; + const int i = ra_get_index(& it->parent->high_low_container, hb); + if (i >= 0) { + uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); + uint16_t lb = val & 0xFFFF; + if(lowvalue < lb ) { + it->container_index = i+1; // will have to load first value of next container + } else {// the value is necessarily within the range of the container + it->container_index = i; + it->has_value = loadfirstvalue_largeorequal(it, val); + return it->has_value; + } } else { - return _scalar_bitset_set_list_withcard(words, card, list, length); + // there is no matching, so we are going for the next container + it->container_index = -i-1; } + it->has_value = loadfirstvalue(it); + return it->has_value; } -static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - if( croaring_avx2() ) { - _asm_bitset_set_list(words, list, length); + +bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { + if (it->container_index >= it->parent->high_low_container.size) { + return (it->has_value = false); + } + if (it->container_index < 0) { + it->container_index = 0; + return (it->has_value = loadfirstvalue(it)); + } + + switch (it->typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(it->container); + it->in_container_index++; + + uint32_t wordindex = it->in_container_index / 64; + if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; + + uint64_t word = bc->words[wordindex] & + (UINT64_MAX << (it->in_container_index % 64)); + // next part could be optimized/simplified + while ((word == 0) && + (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { + wordindex++; + word = bc->words[wordindex]; + } + if (word != 0) { + it->in_container_index = wordindex * 64 + __builtin_ctzll(word); + it->current_value = it->highbits | it->in_container_index; + return (it->has_value = true); + } + break; } + + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(it->container); + it->in_container_index++; + if (it->in_container_index < ac->cardinality) { + it->current_value = + it->highbits | ac->array[it->in_container_index]; + return (it->has_value = true); + } + break; } + + case RUN_CONTAINER_TYPE: { + if(it->current_value == UINT32_MAX) { // avoid overflow to zero + return (it->has_value = false); + } + + const run_container_t* rc = const_CAST_run(it->container); + uint32_t limit = (it->highbits | (rc->runs[it->run_index].value + + rc->runs[it->run_index].length)); + if (++it->current_value <= limit) { + return (it->has_value = true); + } + + if (++it->run_index < rc->n_runs) { // Assume the run has a value + it->current_value = + it->highbits | rc->runs[it->run_index].value; + return (it->has_value = true); + } + break; + } + + default: + __builtin_unreachable(); + } + + // moving to next container + it->container_index++; + return (it->has_value = loadfirstvalue(it)); +} + +bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { + if (it->container_index < 0) { + return (it->has_value = false); + } + if (it->container_index >= it->parent->high_low_container.size) { + it->container_index = it->parent->high_low_container.size - 1; + return (it->has_value = loadlastvalue(it)); + } + + switch (it->typecode) { + case BITSET_CONTAINER_TYPE: { + if (--it->in_container_index < 0) + break; + + const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; + int32_t wordindex = it->in_container_index / 64; + uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); + + while (word == 0 && --wordindex >= 0) { + word = bitset_container->words[wordindex]; + } + if (word == 0) + break; + + int num_leading_zeros = __builtin_clzll(word); + it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); + it->current_value = it->highbits | it->in_container_index; + return (it->has_value = true); + } + case ARRAY_CONTAINER_TYPE: { + if (--it->in_container_index < 0) + break; + + const array_container_t* array_container = (const array_container_t*)it->container; + it->current_value = it->highbits | array_container->array[it->in_container_index]; + return (it->has_value = true); + } + case RUN_CONTAINER_TYPE: { + if(it->current_value == 0) + return (it->has_value = false); + + const run_container_t* run_container = (const run_container_t*)it->container; + if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { + return (it->has_value = true); + } + + if (--it->run_index < 0) + break; + + it->current_value = it->highbits | (run_container->runs[it->run_index].value + + run_container->runs[it->run_index].length); + return (it->has_value = true); + } + default: + // if this ever happens, bug! + assert(false); + } // switch (typecode) + + // moving to previous container + it->container_index--; + return (it->has_value = loadlastvalue(it)); +} + +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { + uint32_t ret = 0; + uint32_t num_values; + uint32_t wordindex; // used for bitsets + uint64_t word; // used for bitsets + const array_container_t* acont; //TODO remove + const run_container_t* rcont; //TODO remove + const bitset_container_t* bcont; //TODO remove + + while (it->has_value && ret < count) { + switch (it->typecode) { + case BITSET_CONTAINER_TYPE: + bcont = const_CAST_bitset(it->container); + wordindex = it->in_container_index / 64; + word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); + do { + while (word != 0 && ret < count) { + buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); + word = word & (word - 1); + buf++; + ret++; + } + while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { + wordindex++; + word = bcont->words[wordindex]; + } + } while (word != 0 && ret < count); + it->has_value = (word != 0); + if (it->has_value) { + it->in_container_index = wordindex * 64 + __builtin_ctzll(word); + it->current_value = it->highbits | it->in_container_index; + } + break; + case ARRAY_CONTAINER_TYPE: + acont = const_CAST_array(it->container); + num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); + for (uint32_t i = 0; i < num_values; i++) { + buf[i] = it->highbits | acont->array[it->in_container_index + i]; + } + buf += num_values; + ret += num_values; + it->in_container_index += num_values; + it->has_value = (it->in_container_index < acont->cardinality); + if (it->has_value) { + it->current_value = it->highbits | acont->array[it->in_container_index]; + } + break; + case RUN_CONTAINER_TYPE: + rcont = const_CAST_run(it->container); + //"in_run_index" name is misleading, read it as "max_value_in_current_run" + do { + uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); + num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); + for (uint32_t i = 0; i < num_values; i++) { + buf[i] = it->current_value + i; + } + it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 + buf += num_values; + ret += num_values; + + if (it->current_value > largest_run_value || it->current_value == 0) { + it->run_index++; + if (it->run_index < rcont->n_runs) { + it->current_value = it->highbits | rcont->runs[it->run_index].value; + } else { + it->has_value = false; + } + } + } while ((ret < count) && it->has_value); + break; + default: + assert(false); + } + if (it->has_value) { + assert(ret == count); + return ret; + } + it->container_index++; + it->has_value = loadfirstvalue(it); + } + return ret; +} + + + +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); } + +/**** +* end of roaring_uint32_iterator_t +*****/ + +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + const roaring_array_t *ra1 = &r1->high_low_container; + const roaring_array_t *ra2 = &r2->high_low_container; + + if (ra1->size != ra2->size) { + return false; + } + for (int i = 0; i < ra1->size; ++i) { + if (ra1->keys[i] != ra2->keys[i]) { + return false; + } + } + for (int i = 0; i < ra1->size; ++i) { + bool areequal = container_equals(ra1->containers[i], + ra1->typecodes[i], + ra2->containers[i], + ra2->typecodes[i]); + if (!areequal) { + return false; + } + } + return true; +} + +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + const roaring_array_t *ra1 = &r1->high_low_container; + const roaring_array_t *ra2 = &r2->high_low_container; + + const int length1 = ra1->size, + length2 = ra2->size; + + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(ra1, pos1); + const uint16_t s2 = ra_get_key_at_index(ra2, pos2); + + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1); + container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2); + if (!container_is_subset(c1, type1, c2, type2)) + return false; + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + return false; + } else { // s1 > s2 + pos2 = ra_advance_until(ra2, s1, pos2); + } + } + if (pos1 == length1) + return true; + else + return false; +} + +static void insert_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = + container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } } else { - _scalar_bitset_set_list(words, list, length); + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); } } -#else -static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - words[offset] = newload; - list++; + +static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = container_inot_range( + container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + // if a new container was created, the old one was already freed + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } + + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); } - return card; } -static uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - words[offset] = newload; - list++; +static void insert_fully_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, + uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = + container_not(container_to_flip, ctype_in, &ctype_out); + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); } - return card; } -static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - words[offset] = newload; - list++; +static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = + container_inot(container_to_flip, ctype_in, &ctype_out); + + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } + + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); + } +} + +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, + uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end) { + return roaring_bitmap_copy(x1); + } + if(range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } + + roaring_bitmap_t *ans = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); + + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; + uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); + const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; + + ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, + hb_start, is_cow(x1)); + if (hb_start == hb_end) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, + lb_start, 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } + + if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block + + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + insert_fully_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb); + } + + // handle a partial final container + if (lb_end != 0xFFFF) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; + } + } + ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, + hb_end, is_cow(x1)); + return ans; +} + +void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end) { + return; // empty range + } + if(range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } + + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; + uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); + const uint16_t lb_end = (uint16_t)(range_end - 1); + + if (hb_start == hb_end) { + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } + + if (lb_end != 0xFFFF) --hb_end; + + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + inplace_fully_flip_container(&x1->high_low_container, hb); + } + // handle a partial final container + if (lb_end != 0xFFFF) { + inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; + } + } +} + +static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, uint8_t t) { + int size = ra_get_size(ra); + if (size == 0 || ra_get_key_at_index(ra, size-1) != k) { + // No merge. + ra_append(ra, k, c, t); + return; + } + + uint8_t last_t, new_t; + container_t *last_c, *new_c; + + // NOTE: we don't need to unwrap here, since we added last_c ourselves + // we have the certainty it's not a shared container. + // The same applies to c, as it's the result of calling container_offset. + last_c = ra_get_container_at_index(ra, size-1, &last_t); + new_c = container_ior(last_c, last_t, c, t, &new_t); + + ra_set_container_at_index(ra, size-1, new_c, new_t); + + // Comparison of pointers of different origin is UB (or so claim some compiler + // makers), so we compare their bit representation only. + if ((uintptr_t)last_c != (uintptr_t)new_c) { + container_free(last_c, last_t); + } + container_free(c, t); +} + +// roaring_bitmap_add_offset adds the value 'offset' to each and every value in +// a bitmap, generating a new bitmap in the process. If offset + element is +// outside of the range [0,2^32), that the element will be dropped. +// We need "offset" to be 64 bits because we want to support values +// between -0xFFFFFFFF up to +0xFFFFFFFF. +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, + int64_t offset) { + roaring_bitmap_t *answer; + roaring_array_t *ans_ra; + int64_t container_offset; + uint16_t in_offset; + + const roaring_array_t *bm_ra = &bm->high_low_container; + int length = bm_ra->size; + + if (offset == 0) { + return roaring_bitmap_copy(bm); + } + + container_offset = offset >> 16; + in_offset = (uint16_t)(offset - container_offset * (1 << 16)); + + answer = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(answer, is_cow(bm)); + + ans_ra = &answer->high_low_container; + + if (in_offset == 0) { + ans_ra = &answer->high_low_container; + + for (int i = 0, j = 0; i < length; ++i) { + int64_t key = ra_get_key_at_index(bm_ra, i); + key += container_offset; + + if (key < 0 || key >= (1 << 16)) { + continue; + } + + ra_append_copy(ans_ra, bm_ra, i, false); + ans_ra->keys[j++] = key; + } + + return answer; + } + + uint8_t t; + const container_t *c; + container_t *lo, *hi, **lo_ptr, **hi_ptr; + int64_t k; + + for (int i = 0; i < length; ++i) { + lo = hi = NULL; + lo_ptr = hi_ptr = NULL; + + k = ra_get_key_at_index(bm_ra, i)+container_offset; + if (k >= 0 && k < (1 << 16)) { + lo_ptr = &lo; + } + if (k+1 >= 0 && k+1 < (1 << 16)) { + hi_ptr = &hi; + } + if (lo_ptr == NULL && hi_ptr == NULL) { + continue; + } + + c = ra_get_container_at_index(bm_ra, i, &t); + c = container_unwrap_shared(c, &t); + + container_add_offset(c, t, lo_ptr, hi_ptr, in_offset); + if (lo != NULL) { + offset_append_with_merge(ans_ra, k, lo, t); + } + if (hi != NULL) { + ra_append(ans_ra, k+1, hi, t); + } } + + return answer; } +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c; + if (bitsetconversion && + (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && + (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE) + ){ + container_t *newc1 = + container_mutable_unwrap_shared(c1, &type1); + newc1 = container_to_bitset(newc1, type1); + type1 = BITSET_CONTAINER_TYPE; + c = container_lazy_ior(newc1, type1, c2, type2, + &result_type); + if (c != newc1) { // should not happen + container_free(newc1, type1); + } + } else { + c = container_lazy_or(c1, type1, c2, type2, &result_type); + } + // since we assume that the initial containers are non-empty, + // the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + if (!container_is_full(c1, type1)) { + if ((bitsetconversion == false) || + (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE) + ){ + c1 = get_writable_copy_if_shared(c1, &type1); + } else { + // convert to bitset + container_t *old_c1 = c1; + uint8_t old_type1 = type1; + c1 = container_mutable_unwrap_shared(c1, &type1); + c1 = container_to_bitset(c1, type1); + container_free(old_c1, old_type1); + type1 = BITSET_CONTAINER_TYPE; + } + + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = container_lazy_ior(c1, type1, c2, type2, + &result_type); + + if (c != c1) { // in this instance a new container was created, + // and we need to free the old one + container_free(c1, type1); + } + + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + // container_t *c2_clone = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + container_t *c = container_lazy_xor( + c1, type1, c2, type2, &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); + } + + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + + // We do the computation "in place" only when c1 is not a shared container. + // Rationale: using a shared container safely with in place computation would + // require making a copy and then doing the computation in place which is likely + // less efficient than avoiding in place entirely and always generating a new + // container. + + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_lazy_xor(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // release + } + else { + c = container_lazy_ixor(c1, type1, c2, type2, &result_type); + } + + if (container_nonzero_cardinality(c, result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + ++pos1; + } else { + container_free(c, result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + // container_t *c2_clone = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { + roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) { + const uint8_t old_type = ra->typecodes[i]; + container_t *old_c = ra->containers[i]; + uint8_t new_type = old_type; + container_t *new_c = container_repair_after_lazy(old_c, &new_type); + ra->containers[i] = new_c; + ra->typecodes[i] = new_type; + } +} + + + +/** +* roaring_bitmap_rank returns the number of integers that are smaller or equal +* to x. +*/ +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { + uint64_t size = 0; + uint32_t xhigh = x >> 16; + for (int i = 0; i < bm->high_low_container.size; i++) { + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + } else if (xhigh == key) { + return size + container_rank(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], + x & 0xFFFF); + } else { + return size; + } + } + return size; +} + +/** +* roaring_bitmap_smallest returns the smallest value in the set. +* Returns UINT32_MAX if the set is empty. +*/ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + container_t *c = bm->high_low_container.containers[0]; + uint8_t type = bm->high_low_container.typecodes[0]; + uint32_t key = bm->high_low_container.keys[0]; + uint32_t lowvalue = container_minimum(c, type); + return lowvalue | (key << 16); + } + return UINT32_MAX; +} + +/** +* roaring_bitmap_smallest returns the greatest value in the set. +* Returns 0 if the set is empty. +*/ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + container_t *container = + bm->high_low_container.containers[bm->high_low_container.size - 1]; + uint8_t typecode = + bm->high_low_container.typecodes[bm->high_low_container.size - 1]; + uint32_t key = + bm->high_low_container.keys[bm->high_low_container.size - 1]; + uint32_t lowvalue = container_maximum(container, typecode); + return lowvalue | (key << 16); + } + return 0; +} + +bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, + uint32_t *element) { + container_t *container; + uint8_t typecode; + uint16_t key; + uint32_t start_rank = 0; + int i = 0; + bool valid = false; + while (!valid && i < bm->high_low_container.size) { + container = bm->high_low_container.containers[i]; + typecode = bm->high_low_container.typecodes[i]; + valid = + container_select(container, typecode, &start_rank, rank, element); + i++; + } + + if (valid) { + key = bm->high_low_container.keys[i - 1]; + *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed + return true; + } else + return false; +} + +bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + if (container_intersect(c1, type1, c2, type2)) + return true; + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); + } + } + return answer != 0; +} + +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, + uint64_t x, uint64_t y) { + if (x >= y) { + // Empty range. + return false; + } + roaring_uint32_iterator_t it; + roaring_init_iterator(bm, &it); + if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) { + // No values above x. + return false; + } + if (it.current_value >= y) { + // No values below y. + return false; + } + return true; +} + + +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + answer += container_and_cardinality(c1, type1, c2, type2); + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer; +} + +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return (double)inter / (double)(c1 + c2 - inter); +} + +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - inter; +} + +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 - inter; +} + +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - 2 * inter; +} + + +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + /* + * the next function call involves a binary search and lots of branching. + */ + int32_t i = ra_get_index(&r->high_low_container, hb); + if (i < 0) return false; + + uint8_t typecode; + // next call ought to be cheap + container_t *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + // rest might be a tad expensive, possibly involving another round of binary search + return container_contains(container, val & 0xFFFF, typecode); +} + + +/** + * Check whether a range of values from range_start (included) to range_end (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { + if(range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } + if (range_start >= range_end) return true; // empty range are always contained! + if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); + uint16_t hb_rs = (uint16_t)(range_start >> 16); + uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); + const int32_t span = hb_re - hb_rs; + const int32_t hlc_sz = ra_get_size(&r->high_low_container); + if (hlc_sz < span + 1) { + return false; + } + int32_t is = ra_get_index(&r->high_low_container, hb_rs); + int32_t ie = ra_get_index(&r->high_low_container, hb_re); + ie = (ie < 0 ? -ie - 1 : ie); + if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) { + return false; + } + const uint32_t lb_rs = range_start & 0xFFFF; + const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; + uint8_t type; + container_t *c = ra_get_container_at_index(&r->high_low_container, is, + &type); + if (hb_rs == hb_re) { + return container_contains_range(c, lb_rs, lb_re, type); + } + if (!container_contains_range(c, lb_rs, 1 << 16, type)) { + return false; + } + c = ra_get_container_at_index(&r->high_low_container, ie, &type); + if (!container_contains_range(c, 0, lb_re, type)) { + return false; + } + for (int32_t i = is + 1; i < ie; ++i) { + c = ra_get_container_at_index(&r->high_low_container, i, &type); + if (!container_is_full(c, type) ) { + return false; + } + } + return true; +} + + +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + return (roaring_bitmap_get_cardinality(r2) > + roaring_bitmap_get_cardinality(r1) && + roaring_bitmap_is_subset(r1, r2)); +} + + +/* + * FROZEN SERIALIZATION FORMAT DESCRIPTION + * + * -- (beginning must be aligned by 32 bytes) -- + * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers] + * <run_data> rle16_t[total number of rle elements in all run containers] + * <array_data> uint16_t[total number of array elements in all array containers] + * <keys> uint16_t[num_containers] + * <counts> uint16_t[num_containers] + * <typecodes> uint8_t[num_containers] + * <header> uint32_t + * + * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) + * and the number of containers (17 bits). + * + * <counts> stores number of elements for every container. + * Its meaning depends on container type. + * For array and bitset containers, this value is the container cardinality minus one. + * For run container, it is the number of rle_t elements (n_runs). + * + * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of + * all containers of respective type. + * + * <*_data> and <keys> are kept close together because they are not accessed + * during deserilization. This may reduce IO in case of large mmaped bitmaps. + * All members have their native alignments during deserilization except <header>, + * which is not guaranteed to be aligned by 4 bytes. + */ + +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { + const roaring_array_t *ra = &rb->high_low_container; + size_t num_bytes = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + num_bytes += rc->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + num_bytes += ac->cardinality * sizeof(uint16_t); + break; + } + default: + __builtin_unreachable(); + } + } + num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes + num_bytes += 4; // header + return num_bytes; +} + +inline static void *arena_alloc(char **arena, size_t num_bytes) { + char *res = *arena; + *arena += num_bytes; + return res; +} + +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { + /* + * Note: we do not require user to supply a specifically aligned buffer. + * Thus we have to use memcpy() everywhere. + */ + + const roaring_array_t *ra = &rb->high_low_container; + + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + bitset_zone_size += + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + run_zone_size += rc->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + array_zone_size += ac->cardinality * sizeof(uint16_t); + break; + } + default: + __builtin_unreachable(); + } + } + + uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); + rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); + uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); + uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); + uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); + uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); + uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); + + for (int32_t i = 0; i < ra->size; i++) { + uint16_t count; + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = + const_CAST_bitset(ra->containers[i]); + memcpy(bitset_zone, bc->words, + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { + count = bc->cardinality - 1; + } else { + count = bitset_container_compute_cardinality(bc) - 1; + } + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + size_t num_bytes = rc->n_runs * sizeof(rle16_t); + memcpy(run_zone, rc->runs, num_bytes); + run_zone += rc->n_runs; + count = rc->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + size_t num_bytes = ac->cardinality * sizeof(uint16_t); + memcpy(array_zone, ac->array, num_bytes); + array_zone += ac->cardinality; + count = ac->cardinality - 1; + break; + } + default: + __builtin_unreachable(); + } + memcpy(&count_zone[i], &count, 2); + } + memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); + memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); + uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; + memcpy(header_zone, &header, 4); +} + +const roaring_bitmap_t * +roaring_bitmap_frozen_view(const char *buf, size_t length) { + if ((uintptr_t)buf % 32 != 0) { + return NULL; + } + + // cookie and num_containers + if (length < 4) { + return NULL; + } + uint32_t header; + memcpy(&header, buf + length - 4, 4); // header may be misaligned + if ((header & 0x7FFF) != FROZEN_COOKIE) { + return NULL; + } + int32_t num_containers = (header >> 15); + + // typecodes, counts and keys + if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { + return NULL; + } + uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); + uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); + uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); + + // {bitset,array,run}_zone + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE: + num_bitset_containers++; + bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + case RUN_CONTAINER_TYPE: + num_run_containers++; + run_zone_size += counts[i] * sizeof(rle16_t); + break; + case ARRAY_CONTAINER_TYPE: + num_array_containers++; + array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); + break; + default: + return NULL; + } + } + if (length != bitset_zone_size + run_zone_size + array_zone_size + + 5 * num_containers + 4) { + return NULL; + } + uint64_t *bitset_zone = (uint64_t*) (buf); + rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); + uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); + + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t*); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); + + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { + return NULL; + } + + roaring_bitmap_t *rb = (roaring_bitmap_t *) + arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.keys = (uint16_t *)keys; + rb->high_low_container.typecodes = (uint8_t *)typecodes; + rb->high_low_container.containers = + (container_t **)arena_alloc(&arena, + sizeof(container_t*) * num_containers); + // Ensure offset of high_low_container.containers is known distance used in + // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the + // only allocation that precedes high_low_container.containers. If this is + // changed (new allocation or changed order), this offset will also need to + // be changed in the C++ wrapper. + assert(rb == + (roaring_bitmap_t *)((char *)rb->high_low_container.containers - + sizeof(roaring_bitmap_t))); + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *) + arena_alloc(&arena, sizeof(bitset_container_t)); + bitset->words = bitset_zone; + bitset->cardinality = counts[i] + UINT32_C(1); + rb->high_low_container.containers[i] = bitset; + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + break; + } + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *) + arena_alloc(&arena, sizeof(run_container_t)); + run->capacity = counts[i]; + run->n_runs = counts[i]; + run->runs = run_zone; + rb->high_low_container.containers[i] = run; + run_zone += run->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *) + arena_alloc(&arena, sizeof(array_container_t)); + array->capacity = counts[i] + UINT32_C(1); + array->cardinality = counts[i] + UINT32_C(1); + array->array = array_zone; + rb->high_low_container.containers[i] = array; + array_zone += counts[i] + UINT32_C(1); + break; + } + default: + roaring_free(arena); + return NULL; + } + } + + return rb; +} + +ALLOW_UNALIGNED +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { + char *start_of_buf = (char *) buf; + uint32_t cookie; + int32_t num_containers; + uint16_t *descriptive_headers; + uint32_t *offset_headers = NULL; + const char *run_flag_bitset = NULL; + bool hasrun = false; + + // deserialize cookie + memcpy(&cookie, buf, sizeof(uint32_t)); + buf += sizeof(uint32_t); + if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { + memcpy(&num_containers, buf, sizeof(int32_t)); + buf += sizeof(int32_t); + descriptive_headers = (uint16_t *) buf; + buf += num_containers * 2 * sizeof(uint16_t); + offset_headers = (uint32_t *) buf; + buf += num_containers * sizeof(uint32_t); + } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { + num_containers = (cookie >> 16) + 1; + hasrun = true; + int32_t run_flag_bitset_size = (num_containers + 7) / 8; + run_flag_bitset = buf; + buf += run_flag_bitset_size; + descriptive_headers = (uint16_t *) buf; + buf += num_containers * 2 * sizeof(uint16_t); + if(num_containers >= NO_OFFSET_THRESHOLD) { + offset_headers = (uint32_t *) buf; + buf += num_containers * sizeof(uint32_t); + } + } else { + return NULL; + } + + // calculate total size for allocation + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; + + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp)); + uint32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + + if (isbitmap) { + num_bitset_containers++; + } else if (isrun) { + num_run_containers++; + } else { + num_array_containers++; + } + } + + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t*); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); + alloc_size += num_containers * sizeof(uint16_t); // keys + alloc_size += num_containers * sizeof(uint8_t); // typecodes + + // allocate bitmap and construct containers + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { + return NULL; + } + + roaring_bitmap_t *rb = (roaring_bitmap_t *) + arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.containers = + (container_t **)arena_alloc(&arena, + sizeof(container_t*) * num_containers); + + uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t)); + uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t)); + + rb->high_low_container.keys = keys; + rb->high_low_container.typecodes = typecodes; + + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp)); + int32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + + keys[i] = descriptive_headers[2*i]; + + if (isbitmap) { + typecodes[i] = BITSET_CONTAINER_TYPE; + bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t)); + c->cardinality = cardinality; + if(offset_headers != NULL) { + c->words = (uint64_t *) (start_of_buf + offset_headers[i]); + } else { + c->words = (uint64_t *) buf; + buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } + rb->high_low_container.containers[i] = c; + } else if (isrun) { + typecodes[i] = RUN_CONTAINER_TYPE; + run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t)); + c->capacity = cardinality; + uint16_t n_runs; + if(offset_headers != NULL) { + memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t)); + c->n_runs = n_runs; + c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t)); + } else { + memcpy(&n_runs, buf, sizeof(uint16_t)); + c->n_runs = n_runs; + buf += sizeof(uint16_t); + c->runs = (rle16_t *) buf; + buf += c->n_runs * sizeof(rle16_t); + } + rb->high_low_container.containers[i] = c; + } else { + typecodes[i] = ARRAY_CONTAINER_TYPE; + array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t)); + c->cardinality = cardinality; + c->capacity = cardinality; + if(offset_headers != NULL) { + c->array = (uint16_t *) (start_of_buf + offset_headers[i]); + } else { + c->array = (uint16_t *) buf; + buf += cardinality * sizeof(uint16_t); + } + rb->high_low_container.containers[i] = c; + } + } + + return rb; +} + + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { #endif +/* end file src/roaring.c */ +/* begin file src/roaring_array.c */ +#include <assert.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> -/* flip specified bits */ -/* TODO: consider whether worthwhile to make an asm version */ -static uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load ^ (UINT64_C(1) << index); - // todo: is a branch here all that bad? - card += - (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 - words[offset] = newload; - list++; +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +// Convention: [0,ra->size) all elements are initialized +// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing + +extern inline int32_t ra_get_size(const roaring_array_t *ra); +extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); + +extern inline container_t *ra_get_container_at_index( + const roaring_array_t *ra, uint16_t i, + uint8_t *typecode); + +extern inline void ra_unshare_container_at_index(roaring_array_t *ra, + uint16_t i); + +extern inline void ra_replace_key_and_container_at_index( + roaring_array_t *ra, int32_t i, uint16_t key, + container_t *c, uint8_t typecode); + +extern inline void ra_set_container_at_index( + const roaring_array_t *ra, int32_t i, + container_t *c, uint8_t typecode); + +static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { + // + // Note: not implemented using C's realloc(), because the memory layout is + // Struct-of-Arrays vs. Array-of-Structs: + // https://github.com/RoaringBitmap/CRoaring/issues/256 + + if ( new_capacity == 0 ) { + roaring_free(ra->containers); + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; + ra->allocation_size = 0; + return true; } - return card; + const size_t memoryneeded = new_capacity * ( + sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); + void *bigalloc = roaring_malloc(memoryneeded); + if (!bigalloc) return false; + void *oldbigalloc = ra->containers; + container_t **newcontainers = (container_t **)bigalloc; + uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); + uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); + assert((char *)(newtypecodes + new_capacity) == + (char *)bigalloc + memoryneeded); + if(ra->size > 0) { + memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); + memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); + memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); + } + ra->containers = newcontainers; + ra->keys = newkeys; + ra->typecodes = newtypecodes; + ra->allocation_size = new_capacity; + roaring_free(oldbigalloc); + return true; } -static void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load ^ (UINT64_C(1) << index); - words[offset] = newload; - list++; +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { + if (!new_ra) return false; + ra_init(new_ra); + + if (cap > INT32_MAX) { return false; } + + if(cap > 0) { + void *bigalloc = roaring_malloc(cap * + (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); + if( bigalloc == NULL ) return false; + new_ra->containers = (container_t **)bigalloc; + new_ra->keys = (uint16_t *)(new_ra->containers + cap); + new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); + // Narrowing is safe because of above check + new_ra->allocation_size = (int32_t)cap; } + return true; +} + +int ra_shrink_to_fit(roaring_array_t *ra) { + int savings = (ra->allocation_size - ra->size) * + (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); + if (!realloc_array(ra, ra->size)) { + return 0; + } + ra->allocation_size = ra->size; + return savings; +} + +void ra_init(roaring_array_t *new_ra) { + if (!new_ra) { return; } + new_ra->keys = NULL; + new_ra->containers = NULL; + new_ra->typecodes = NULL; + + new_ra->allocation_size = 0; + new_ra->size = 0; + new_ra->flags = 0; +} + +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write) { + ra_clear_containers(dest); // we are going to overwrite them + if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size + dest->size = 0; // <--- This is important. + return true; // output was just cleared, so they match + } + if (dest->allocation_size < source->size) { + if (!realloc_array(dest, source->size)) { + return false; + } + } + dest->size = source->size; + memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); + // we go through the containers, turning them into shared containers... + if (copy_on_write) { + for (int32_t i = 0; i < dest->size; ++i) { + source->containers[i] = get_copy_of_container( + source->containers[i], &source->typecodes[i], copy_on_write); + } + // we do a shallow copy to the other bitmap + memcpy(dest->containers, source->containers, + dest->size * sizeof(container_t *)); + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + } else { + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + for (int32_t i = 0; i < dest->size; i++) { + dest->containers[i] = + container_clone(source->containers[i], source->typecodes[i]); + if (dest->containers[i] == NULL) { + for (int32_t j = 0; j < i; j++) { + container_free(dest->containers[j], dest->typecodes[j]); + } + ra_clear_without_containers(dest); + return false; + } + } + } + return true; +} + +void ra_clear_containers(roaring_array_t *ra) { + for (int32_t i = 0; i < ra->size; ++i) { + container_free(ra->containers[i], ra->typecodes[i]); + } +} + +void ra_reset(roaring_array_t *ra) { + ra_clear_containers(ra); + ra->size = 0; + ra_shrink_to_fit(ra); +} + +void ra_clear_without_containers(roaring_array_t *ra) { + roaring_free(ra->containers); // keys and typecodes are allocated with containers + ra->size = 0; + ra->allocation_size = 0; + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; +} + +void ra_clear(roaring_array_t *ra) { + ra_clear_containers(ra); + ra_clear_without_containers(ra); +} + +bool extend_array(roaring_array_t *ra, int32_t k) { + int32_t desired_size = ra->size + k; + const int32_t max_containers = 65536; + assert(desired_size <= max_containers); + if (desired_size > ra->allocation_size) { + int32_t new_capacity = + (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; + if (new_capacity > max_containers) { + new_capacity = max_containers; + } + + return realloc_array(ra, new_capacity); + } + return true; +} + +void ra_append( + roaring_array_t *ra, uint16_t key, + container_t *c, uint8_t typecode +){ + extend_array(ra, 1); + const int32_t pos = ra->size; + + ra->keys[pos] = key; + ra->containers[pos] = c; + ra->typecodes[pos] = typecode; + ra->size++; +} + +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t index, bool copy_on_write) { + extend_array(ra, 1); + const int32_t pos = ra->size; + + // old contents is junk not needing freeing + ra->keys[pos] = sa->keys[index]; + // the shared container will be in two bitmaps + if (copy_on_write) { + sa->containers[index] = get_copy_of_container( + sa->containers[index], &sa->typecodes[index], copy_on_write); + ra->containers[pos] = sa->containers[index]; + ra->typecodes[pos] = sa->typecodes[index]; + } else { + ra->containers[pos] = + container_clone(sa->containers[index], sa->typecodes[index]); + ra->typecodes[pos] = sa->typecodes[index]; + } + ra->size++; +} + +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t stopping_key, bool copy_on_write) { + for (int32_t i = 0; i < sa->size; ++i) { + if (sa->keys[i] >= stopping_key) break; + ra_append_copy(ra, sa, i, copy_on_write); + } +} + +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; + } +} + +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t before_start, bool copy_on_write) { + int start_location = ra_get_index(sa, before_start); + if (start_location >= 0) + ++start_location; + else + start_location = -start_location - 1; + ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); +} + +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index) { + extend_array(ra, end_index - start_index); + + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + + ra->keys[pos] = sa->keys[i]; + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + ra->size++; + } +} + +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; + } +} + +container_t *ra_get_container( + roaring_array_t *ra, uint16_t x, uint8_t *typecode +){ + int i = binarySearch(ra->keys, (int32_t)ra->size, x); + if (i < 0) return NULL; + *typecode = ra->typecodes[i]; + return ra->containers[i]; +} + +extern inline container_t *ra_get_container_at_index( + const roaring_array_t *ra, uint16_t i, + uint8_t *typecode); + +extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, + uint16_t i); + +extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); + +extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, + int32_t pos); + +// everything skipped over is freed +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { + while (pos < ra->size && ra->keys[pos] < x) { + container_free(ra->containers[pos], ra->typecodes[pos]); + ++pos; + } + return pos; +} + +void ra_insert_new_key_value_at( + roaring_array_t *ra, int32_t i, uint16_t key, + container_t *c, uint8_t typecode +){ + extend_array(ra, 1); + // May be an optimization opportunity with DIY memmove + memmove(&(ra->keys[i + 1]), &(ra->keys[i]), + sizeof(uint16_t) * (ra->size - i)); + memmove(&(ra->containers[i + 1]), &(ra->containers[i]), + sizeof(container_t *) * (ra->size - i)); + memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), + sizeof(uint8_t) * (ra->size - i)); + ra->keys[i] = key; + ra->containers[i] = c; + ra->typecodes[i] = typecode; + ra->size++; +} + +// note: Java routine set things to 0, enabling GC. +// Java called it "resize" but it was always used to downsize. +// Allowing upsize would break the conventions about +// valid containers below ra->size. + +void ra_downsize(roaring_array_t *ra, int32_t new_length) { + assert(new_length <= ra->size); + ra->size = new_length; +} + +void ra_remove_at_index(roaring_array_t *ra, int32_t i) { + memmove(&(ra->containers[i]), &(ra->containers[i + 1]), + sizeof(container_t *) * (ra->size - i - 1)); + memmove(&(ra->keys[i]), &(ra->keys[i + 1]), + sizeof(uint16_t) * (ra->size - i - 1)); + memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), + sizeof(uint8_t) * (ra->size - i - 1)); + ra->size--; +} + +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { + container_free(ra->containers[i], ra->typecodes[i]); + ra_remove_at_index(ra, i); +} + +// used in inplace andNot only, to slide left the containers from +// the mutated RoaringBitmap that are after the largest container of +// the argument RoaringBitmap. In use it should be followed by a call to +// downsize. +// +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, + uint32_t new_begin) { + assert(begin <= end); + assert(new_begin < begin); + + const int range = end - begin; + + // We ensure to previously have freed overwritten containers + // that are not copied elsewhere + + memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), + sizeof(container_t *) * range); + memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), + sizeof(uint16_t) * range); + memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), + sizeof(uint8_t) * range); +} + +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { + if (distance > 0) { + extend_array(ra, distance); + } + int32_t srcpos = ra->size - count; + int32_t dstpos = srcpos + distance; + memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), + sizeof(uint16_t) * count); + memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), + sizeof(container_t *) * count); + memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), + sizeof(uint8_t) * count); + ra->size += distance; +} + + +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { + size_t ctr = 0; + for (int32_t i = 0; i < ra->size; ++i) { + int num_added = container_to_uint32_array( + ans + ctr, ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + ctr += num_added; + } +} + +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { + size_t ctr = 0; + size_t dtr = 0; + + size_t t_limit = 0; + + bool first = false; + size_t first_skip = 0; + + uint32_t *t_ans = NULL; + size_t cur_len = 0; + + for (int i = 0; i < ra->size; ++i) { + + const container_t *c = container_unwrap_shared( + ra->containers[i], &ra->typecodes[i]); + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: + t_limit = (const_CAST_bitset(c))->cardinality; + break; + case ARRAY_CONTAINER_TYPE: + t_limit = (const_CAST_array(c))->cardinality; + break; + case RUN_CONTAINER_TYPE: + t_limit = run_container_cardinality(const_CAST_run(c)); + break; + } + if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ + if (!first){ + //first_skip = t_limit - (ctr + t_limit - offset); + first_skip = offset - ctr; + first = true; + t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * (first_skip + limit)); + if(t_ans == NULL) { + return false; + } + memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; + cur_len = first_skip + limit; + } + if (dtr + t_limit > cur_len){ + uint32_t * append_ans = (uint32_t *)roaring_malloc(sizeof(*append_ans) * (cur_len + t_limit)); + if(append_ans == NULL) { + if(t_ans != NULL) roaring_free(t_ans); + return false; + } + memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); + cur_len = cur_len + t_limit; + memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); + roaring_free(t_ans); + t_ans = append_ans; + } + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: + container_to_uint32_array( + t_ans + dtr, + const_CAST_bitset(c), ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case ARRAY_CONTAINER_TYPE: + container_to_uint32_array( + t_ans + dtr, + const_CAST_array(c), ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case RUN_CONTAINER_TYPE: + container_to_uint32_array( + t_ans + dtr, + const_CAST_run(c), ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + } + dtr += t_limit; + } + ctr += t_limit; + if (dtr-first_skip >= limit) break; + } + if(t_ans != NULL) { + memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); + free(t_ans); + } + return true; +} + +bool ra_has_run_container(const roaring_array_t *ra) { + for (int32_t k = 0; k < ra->size; ++k) { + if (get_container_type(ra->containers[k], ra->typecodes[k]) == + RUN_CONTAINER_TYPE) + return true; + } + return false; +} + +uint32_t ra_portable_header_size(const roaring_array_t *ra) { + if (ra_has_run_container(ra)) { + if (ra->size < + NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets + return 4 + (ra->size + 7) / 8 + 4 * ra->size; + } + return 4 + (ra->size + 7) / 8 + + 8 * ra->size; // - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * ra->size; + } +} + +size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { + size_t count = ra_portable_header_size(ra); + + for (int32_t k = 0; k < ra->size; ++k) { + count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } + return count; +} + +// This function is endian-sensitive. +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { + char *initbuf = buf; + uint32_t startOffset = 0; + bool hasrun = ra_has_run_container(ra); + if (hasrun) { + uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + uint32_t s = (ra->size + 7) / 8; + uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1); + assert(bitmapOfRunContainers != NULL); // todo: handle + for (int32_t i = 0; i < ra->size; ++i) { + if (get_container_type(ra->containers[i], ra->typecodes[i]) == + RUN_CONTAINER_TYPE) { + bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); + } + } + memcpy(buf, bitmapOfRunContainers, s); + buf += s; + roaring_free(bitmapOfRunContainers); + if (ra->size < NO_OFFSET_THRESHOLD) { + startOffset = 4 + 4 * ra->size + s; + } else { + startOffset = 4 + 8 * ra->size + s; + } + } else { // backwards compatibility + uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; + + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + memcpy(buf, &ra->size, sizeof(ra->size)); + buf += sizeof(ra->size); + + startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; + } + for (int32_t k = 0; k < ra->size; ++k) { + memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); + buf += sizeof(ra->keys[k]); + // get_cardinality returns a value in [1,1<<16], subtracting one + // we get [0,1<<16 - 1] which fits in 16 bits + uint16_t card = (uint16_t)( + container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); + memcpy(buf, &card, sizeof(card)); + buf += sizeof(card); + } + if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { + // writing the containers offsets + for (int32_t k = 0; k < ra->size; k++) { + memcpy(buf, &startOffset, sizeof(startOffset)); + buf += sizeof(startOffset); + startOffset = + startOffset + + container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } + } + for (int32_t k = 0; k < ra->size; ++k) { + buf += container_write(ra->containers[k], ra->typecodes[k], buf); + } + return buf - initbuf; +} + +// Quickly checks whether there is a serialized bitmap at the pointer, +// not exceeding size "maxbytes" in bytes. This function does not allocate +// memory dynamically. +// +// This function returns 0 if and only if no valid bitmap is found. +// Otherwise, it returns how many bytes are occupied. +// +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { + size_t bytestotal = sizeof(int32_t);// for cookie + if(bytestotal > maxbytes) return 0; + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + return 0; + } + int32_t size; + + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + bytestotal += sizeof(int32_t); + if(bytestotal > maxbytes) return 0; + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + } + if (size > (1<<16)) { + return 0; // logically impossible + } + char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + bytestotal += s; + if(bytestotal > maxbytes) return 0; + bitmapOfRunContainers = (char *)buf; + buf += s; + } + bytestotal += size * 2 * sizeof(uint16_t); + if(bytestotal > maxbytes) return 0; + uint16_t *keyscards = (uint16_t *)buf; + buf += size * 2 * sizeof(uint16_t); + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + // skipping the offsets + bytestotal += size * 4; + if(bytestotal > maxbytes) return 0; + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + if (isbitmap) { + size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + bytestotal += containersize; + if(bytestotal > maxbytes) return 0; + buf += containersize; + } else if (isrun) { + bytestotal += sizeof(uint16_t); + if(bytestotal > maxbytes) return 0; + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + buf += sizeof(uint16_t); + size_t containersize = n_runs * sizeof(rle16_t); + bytestotal += containersize; + if(bytestotal > maxbytes) return 0; + buf += containersize; + } else { + size_t containersize = thiscard * sizeof(uint16_t); + bytestotal += containersize; + if(bytestotal > maxbytes) return 0; + buf += containersize; + } + } + return bytestotal; +} + +// this function populates answer from the content of buf (reading up to maxbytes bytes). +// The function returns false if a properly serialized bitmap cannot be found. +// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. +// +// This function is endian-sensitive. +bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { + *readbytes = sizeof(int32_t);// for cookie + if(*readbytes > maxbytes) { + fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n"); + return false; + } + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n", + cookie); + return false; + } + int32_t size; + + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + *readbytes += sizeof(int32_t); + if(*readbytes > maxbytes) { + fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n"); + return false; + } + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + } + if (size < 0) { + fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n", + size); + return false; // logically impossible + } + if (size > (1<<16)) { + fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n", + size); + return false; // logically impossible + } + const char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + *readbytes += s; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Ran out of bytes while reading run bitmap.\n"); + return false; + } + bitmapOfRunContainers = buf; + buf += s; + } + uint16_t *keyscards = (uint16_t *)buf; + + *readbytes += size * 2 * sizeof(uint16_t); + if(*readbytes > maxbytes) { + fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n"); + return false; + } + buf += size * 2 * sizeof(uint16_t); + + bool is_ok = ra_init_with_capacity(answer, size); + if (!is_ok) { + fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n"); + return false; + } + + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); + answer->keys[k] = tmp; + } + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + *readbytes += size * 4; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Ran out of bytes while reading offsets.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + + // skipping the offsets + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + if (isbitmap) { + // we check that the read is allowed + size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + *readbytes += containersize; + if(*readbytes > maxbytes) { + fprintf(stderr, "Running out of bytes while reading a bitset container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + // it is now safe to read + bitset_container_t *c = bitset_container_create(); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for a bitset container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + answer->size++; + buf += bitset_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = BITSET_CONTAINER_TYPE; + } else if (isrun) { + // we check that the read is allowed + *readbytes += sizeof(uint16_t); + if(*readbytes > maxbytes) { + fprintf(stderr, "Running out of bytes while reading a run container (header).\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + size_t containersize = n_runs * sizeof(rle16_t); + *readbytes += containersize; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Running out of bytes while reading a run container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + // it is now safe to read + + run_container_t *c = run_container_create(); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for a run container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + answer->size++; + buf += run_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = RUN_CONTAINER_TYPE; + } else { + // we check that the read is allowed + size_t containersize = thiscard * sizeof(uint16_t); + *readbytes += containersize; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Running out of bytes while reading an array container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + // it is now safe to read + array_container_t *c = + array_container_create_given_capacity(thiscard); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for an array container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + answer->size++; + buf += array_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = ARRAY_CONTAINER_TYPE; + } + } + return true; } #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { #endif -/* end file src/bitset_util.c */ +/* end file src/roaring_array.c */ +/* begin file src/roaring_priority_queue.c */ + + +#ifdef __cplusplus +using namespace ::roaring::internal; + +extern "C" { namespace roaring { namespace api { +#endif + +struct roaring_pq_element_s { + uint64_t size; + bool is_temporary; + roaring_bitmap_t *bitmap; +}; + +typedef struct roaring_pq_element_s roaring_pq_element_t; + +struct roaring_pq_s { + roaring_pq_element_t *elements; + uint64_t size; +}; + +typedef struct roaring_pq_s roaring_pq_t; + +static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { + return t1->size < t2->size; +} + +static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { + uint64_t i = pq->size; + pq->elements[pq->size++] = *t; + while (i > 0) { + uint64_t p = (i - 1) >> 1; + roaring_pq_element_t ap = pq->elements[p]; + if (!compare(t, &ap)) break; + pq->elements[i] = ap; + i = p; + } + pq->elements[i] = *t; +} + +static void pq_free(roaring_pq_t *pq) { + roaring_free(pq); +} + +static void percolate_down(roaring_pq_t *pq, uint32_t i) { + uint32_t size = (uint32_t)pq->size; + uint32_t hsize = size >> 1; + roaring_pq_element_t ai = pq->elements[i]; + while (i < hsize) { + uint32_t l = (i << 1) + 1; + uint32_t r = l + 1; + roaring_pq_element_t bestc = pq->elements[l]; + if (r < size) { + if (compare(pq->elements + r, &bestc)) { + l = r; + bestc = pq->elements[r]; + } + } + if (!compare(&bestc, &ai)) { + break; + } + pq->elements[i] = bestc; + i = l; + } + pq->elements[i] = ai; +} + +static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { + size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; + roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size); + answer->elements = (roaring_pq_element_t *)(answer + 1); + answer->size = length; + for (uint32_t i = 0; i < length; i++) { + answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; + answer->elements[i].is_temporary = false; + answer->elements[i].size = + roaring_bitmap_portable_size_in_bytes(arr[i]); + } + for (int32_t i = (length >> 1); i >= 0; i--) { + percolate_down(answer, i); + } + return answer; +} + +static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { + roaring_pq_element_t ans = *pq->elements; + if (pq->size > 1) { + pq->elements[0] = pq->elements[--pq->size]; + percolate_down(pq, 0); + } else + --pq->size; + // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; + return ans; +} + +// this function consumes and frees the inputs +static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, + roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = ra_get_size(&x1->high_low_container), + length2 = ra_get_size(&x2->high_low_container); + if (0 == length1) { + roaring_bitmap_free(x1); + return x2; + } + if (0 == length2) { + roaring_bitmap_free(x2); + return x1; + } + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + // todo: unsharing can be inefficient as it may create a clone where + // none + // is needed, but it has the benefit of being easy to reason about. + + ra_unshare_container_at_index(&x1->high_low_container, pos1); + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + assert(type1 != SHARED_CONTAINER_TYPE); + + ra_unshare_container_at_index(&x2->high_low_container, pos2); + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + assert(type2 != SHARED_CONTAINER_TYPE); + + container_t *c; + + if ((type2 == BITSET_CONTAINER_TYPE) && + (type1 != BITSET_CONTAINER_TYPE) + ){ + c = container_lazy_ior(c2, type2, c1, type1, &result_type); + container_free(c1, type1); + if (c != c2) { + container_free(c2, type2); + } + } else { + c = container_lazy_ior(c1, type1, c2, type2, &result_type); + container_free(c2, type2); + if (c != c1) { + container_free(c1, type1); + } + } + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, pos1, &type1); + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, pos2, &type2); + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_move_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2); + } else if (pos2 == length2) { + ra_append_move_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1); + } + ra_clear_without_containers(&x1->high_low_container); + ra_clear_without_containers(&x2->high_low_container); + roaring_free(x1); + roaring_free(x2); + return answer; +} + +/** + * Compute the union of 'number' bitmaps using a heap. This can + * sometimes be faster than roaring_bitmap_or_many which uses + * a naive algorithm. Caller is responsible for freeing the + * result. + */ +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_pq_t *pq = create_pq(x, number); + while (pq->size > 1) { + roaring_pq_element_t x1 = pq_poll(pq); + roaring_pq_element_t x2 = pq_poll(pq); + + if (x1.is_temporary && x2.is_temporary) { + roaring_bitmap_t *newb = + lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); + // should normally return a fresh new bitmap *except* that + // it can return x1.bitmap or x2.bitmap in degenerate cases + bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = temporary, .bitmap = newb}; + pq_add(pq, &newelement); + } else if (x2.is_temporary) { + roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); + x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); + pq_add(pq, &x2); + } else if (x1.is_temporary) { + roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); + x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); + + pq_add(pq, &x1); + } else { + roaring_bitmap_t *newb = + roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = true, .bitmap = newb}; + + pq_add(pq, &newelement); + } + } + roaring_pq_element_t X = pq_poll(pq); + roaring_bitmap_t *answer = X.bitmap; + roaring_bitmap_repair_after_lazy(answer); + pq_free(pq); + return answer; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { +#endif +/* end file src/roaring_priority_queue.c */ |