aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-02-12 12:50:05 +0100
committerLuca Deri <deri@ntop.org>2023-02-12 12:50:24 +0100
commitbf413afba1b79685caf1ccade5f984c2d6e92e3c (patch)
tree50d46f7a5e68496dcd8734ada5296b6b47a4160f
parentba4e145aad4c7dbd1cbc6d2a6557f3686447d96a (diff)
Update roaring bitmap code
-rw-r--r--src/lib/ndpi_bitmap.c1
-rw-r--r--src/lib/third_party/include/roaring.h379
-rw-r--r--src/lib/third_party/src/roaring.c (renamed from src/lib/third_party/src/roaring.cc)17848
3 files changed, 9442 insertions, 8786 deletions
diff --git a/src/lib/ndpi_bitmap.c b/src/lib/ndpi_bitmap.c
index 499b1342a..cf23b3f34 100644
--- a/src/lib/ndpi_bitmap.c
+++ b/src/lib/ndpi_bitmap.c
@@ -36,7 +36,6 @@
#include "ndpi_encryption.h"
#include "third_party/include/roaring.h"
-#include "third_party/src/roaring.cc"
/* ******************************************* */
diff --git a/src/lib/third_party/include/roaring.h b/src/lib/third_party/include/roaring.h
index 2d5bb856f..117f861b4 100644
--- a/src/lib/third_party/include/roaring.h
+++ b/src/lib/third_party/include/roaring.h
@@ -1,8 +1,12 @@
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
-// Created by amalgamation.sh on Mer 25 Ago 2021 04:24:41 CEST
+// Created by amalgamation.sh on 2023-02-12T11:34:02Z
/*
- * Copyright 2016-2020 The CRoaring authors
+ * The CRoaring project is under a dual license (Apache/MIT).
+ * Users of the library may choose one or the other license.
+ */
+/*
+ * Copyright 2016-2022 The CRoaring authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,16 +22,47 @@
*
* SPDX-License-Identifier: Apache-2.0
*/
+/*
+ * MIT License
+ *
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Permission is hereby granted, free of charge, to any
+ * person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the
+ * Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software
+ * is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+ * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+ * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
/* begin file include/roaring/roaring_version.h */
// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
#ifndef ROARING_INCLUDE_ROARING_VERSION
#define ROARING_INCLUDE_ROARING_VERSION
-#define ROARING_VERSION = 0.3.4,
+#define ROARING_VERSION "0.9.6"
enum {
ROARING_VERSION_MAJOR = 0,
- ROARING_VERSION_MINOR = 3,
- ROARING_VERSION_REVISION = 4
+ ROARING_VERSION_MINOR = 9,
+ ROARING_VERSION_REVISION = 6
};
#endif // ROARING_INCLUDE_ROARING_VERSION
/* end file include/roaring/roaring_version.h */
@@ -68,12 +103,6 @@ extern "C" { namespace roaring { namespace api {
#define ROARING_CONTAINER_T void // no compile-time checking
#endif
-
-#define MAX_CONTAINERS 65536
-
-#define SERIALIZATION_ARRAY_UINT32 1
-#define SERIALIZATION_CONTAINER 2
-
#define ROARING_FLAG_COW UINT8_C(0x1)
#define ROARING_FLAG_FROZEN UINT8_C(0x2)
@@ -170,7 +199,7 @@ typedef struct roaring_bitmap_s {
* Capacity is a performance hint for how many "containers" the data will need.
* Client is responsible for calling `roaring_bitmap_free()`.
*/
-static roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
+roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
/**
* Dynamically allocates a new bitmap (initially empty).
@@ -185,7 +214,7 @@ static inline roaring_bitmap_t *roaring_bitmap_create(void)
* Capacity is a performance hint for how many "containers" the data will need.
* Can return false if auxiliary allocations fail when capacity greater than 0.
*/
-static bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
+bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
/**
* Initialize a roaring bitmap structure in memory controlled by client.
@@ -199,13 +228,13 @@ static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
* Add all the values between min (included) and max (excluded) that are at a
* distance k*step from min.
*/
-static roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
+roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
uint32_t step);
/**
* Creates a new bitmap from a pointer of uint32_t integers
*/
-static roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
+roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
/*
* Whether you want to use copy-on-write.
@@ -228,21 +257,23 @@ static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
}
}
+roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
+ int64_t offset);
/**
* Describe the inner structure of the bitmap.
*/
-static void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
+void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
/**
* Creates a new bitmap from a list of uint32_t integers
*/
-static roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
/**
* Copies a bitmap (this does memory allocation).
* The caller is responsible for memory management.
*/
-static roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
+roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
/**
* Copies a bitmap from src to dest. It is assumed that the pointer dest
@@ -252,37 +283,42 @@ static roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
* It might be preferable and simpler to call roaring_bitmap_copy except
* that roaring_bitmap_overwrite can save on memory allocations.
*/
-static bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
+bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
const roaring_bitmap_t *src);
/**
* Print the content of the bitmap.
*/
-static void roaring_bitmap_printf(const roaring_bitmap_t *r);
+void roaring_bitmap_printf(const roaring_bitmap_t *r);
/**
* Computes the intersection between two bitmaps and returns new bitmap. The
* caller is responsible for memory management.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+ * You may also rely on roaring_bitmap_and_inplace to avoid creating
+ * many temporary bitmaps.
*/
-static roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Computes the size of the intersection between two bitmaps.
*/
-static uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1,
+uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Check whether two bitmaps intersect.
*/
-static bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
+bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Check whether a bitmap and a closed range intersect.
*/
-static bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
uint64_t x, uint64_t y);
/**
@@ -291,46 +327,49 @@ static bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
*
* The Jaccard index is undefined if both bitmaps are empty.
*/
-static double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1,
+double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Computes the size of the union between two bitmaps.
*/
-static uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1,
+uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Computes the size of the difference (andnot) between two bitmaps.
*/
-static uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1,
+uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Computes the size of the symmetric difference (xor) between two bitmaps.
*/
-static uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1,
+uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Inplace version of `roaring_bitmap_and()`, modifies r1
- * r1 == r2 is allowed
+ * r1 == r2 is allowed.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
*/
-static void roaring_bitmap_and_inplace(roaring_bitmap_t *r1,
+void roaring_bitmap_and_inplace(roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Computes the union between two bitmaps and returns new bitmap. The caller is
* responsible for memory management.
*/
-static roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Inplace version of `roaring_bitmap_or(), modifies r1.
* TODO: decide whether r1 == r2 ok
*/
-static void roaring_bitmap_or_inplace(roaring_bitmap_t *r1,
+void roaring_bitmap_or_inplace(roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
@@ -338,7 +377,7 @@ static void roaring_bitmap_or_inplace(roaring_bitmap_t *r1,
* Caller is responsible for freeing the result.
* See also `roaring_bitmap_or_many_heap()`
*/
-static roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
+roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
const roaring_bitmap_t **rs);
/**
@@ -346,40 +385,40 @@ static roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
* faster than `roaring_bitmap_or_many() which uses a naive algorithm.
* Caller is responsible for freeing the result.
*/
-static roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
+roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
const roaring_bitmap_t **rs);
/**
* Computes the symmetric difference (xor) between two bitmaps
* and returns new bitmap. The caller is responsible for memory management.
*/
-static roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2.
*/
-static void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1,
+void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Compute the xor of 'number' bitmaps.
* Caller is responsible for freeing the result.
*/
-static roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
+roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
const roaring_bitmap_t **rs);
/**
* Computes the difference (andnot) between two bitmaps and returns new bitmap.
* Caller is responsible for freeing the result.
*/
-static roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2.
*/
-static void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
+void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
@@ -396,30 +435,69 @@ static void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
/**
* Frees the memory.
*/
-static void roaring_bitmap_free(const roaring_bitmap_t *r);
+void roaring_bitmap_free(const roaring_bitmap_t *r);
+
+/**
+ * A bit of context usable with `roaring_bitmap_*_bulk()` functions
+ *
+ * Should be initialized with `{0}` (or `memset()` to all zeros).
+ * Callers should treat it as an opaque type.
+ *
+ * A context may only be used with a single bitmap
+ * (unless re-initialized to zero), and any modification to a bitmap
+ * (other than modifications performed with `_bulk()` functions with the context
+ * passed) will invalidate any contexts associated with that bitmap.
+ */
+typedef struct roaring_bulk_context_s {
+ ROARING_CONTAINER_T *container;
+ int idx;
+ uint16_t key;
+ uint8_t typecode;
+} roaring_bulk_context_t;
+
+/**
+ * Add an item, using context from a previous insert for speed optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context, uint32_t val);
/**
* Add value n_args from pointer vals, faster than repeatedly calling
* `roaring_bitmap_add()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same "key" (high 16 bits of the value) as consecutive
+ * elements in `vals`
*/
-static void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
+void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
const uint32_t *vals);
/**
* Add value x
*/
-static void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
+void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
/**
* Add value x
* Returns true if a new value was added, false if the value already existed.
*/
-static bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
+bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
/**
* Add all values in range [min, max]
*/
-static void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
uint32_t min, uint32_t max);
/**
@@ -434,12 +512,12 @@ static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
/**
* Remove value x
*/
-static void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
+void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
/**
* Remove all values in range [min, max]
*/
-static void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
uint32_t min, uint32_t max);
/**
@@ -454,44 +532,63 @@ static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
/**
* Remove multiple values
*/
-static void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
+void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
const uint32_t *vals);
/**
* Remove value x
* Returns true if a new value was removed, false if the value was not existing.
*/
-static bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x);
+bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x);
/**
* Check if value is present
*/
-static bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
+bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
/**
* Check whether a range of values from range_start (included)
* to range_end (excluded) is present
*/
-static bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
uint64_t range_start,
uint64_t range_end);
/**
+ * Check if an items is present, using context from a previous insert for speed
+ * optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val);
+
+/**
* Get the cardinality of the bitmap (number of elements).
*/
-static uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r);
+uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r);
/**
* Returns the number of elements in the range [range_start, range_end).
*/
-static uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
+uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
uint64_t range_start,
uint64_t range_end);
/**
* Returns true if the bitmap is empty (cardinality is zero).
*/
-static bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
+bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
/**
@@ -499,20 +596,20 @@ static bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
* was initialized in client memory via roaring_bitmap_init(), then a call to
* roaring_bitmap_clear() would be enough to "free" it)
*/
-static void roaring_bitmap_clear(roaring_bitmap_t *r);
+void roaring_bitmap_clear(roaring_bitmap_t *r);
/**
- * Convert the bitmap to an array, output in `ans`,
+ * Convert the bitmap to a sorted array, output in `ans`.
*
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
* ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t));
*/
-static void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
+void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
/**
- * Convert the bitmap to an array from `offset` by `limit`, output in `ans`.
+ * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`.
*
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
@@ -520,7 +617,7 @@ static void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *
*
* Return false in case of failure (e.g., insufficient memory)
*/
-static bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
size_t offset, size_t limit,
uint32_t *ans);
@@ -528,7 +625,7 @@ static bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
* Remove run-length encoding even when it is more space efficient.
* Return whether a change was applied.
*/
-static bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
+bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
/**
* Convert array and bitmap containers to run containers when it is more
@@ -537,13 +634,13 @@ static bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
* Returns true if the result has at least one run container.
* Additional savings might be possible by calling `shrinkToFit()`.
*/
-static bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
+bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
/**
* If needed, reallocate memory to shrink the memory usage.
* Returns the number of bytes saved.
*/
-static size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
+size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
/**
* Write the bitmap to an output pointer, this output buffer should refer to
@@ -554,22 +651,28 @@ static size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
* more space efficient than the portable form, e.g. when the data is sparse.
*
* Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
+size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
/**
* Use with `roaring_bitmap_serialize()`.
*
* (See `roaring_bitmap_portable_deserialize()` if you want a format that's
- * compatible with Java and Go implementations)
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
+roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
/**
* How many bytes are required to serialize this bitmap (NOT compatible
* with Java and Go versions)
*/
-static size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
+size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
/**
* Read bitmap from a serialized buffer.
@@ -581,8 +684,11 @@ static size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
*
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
+*
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
+roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
/**
* Read bitmap from a serialized buffer safely (reading up to maxbytes).
@@ -590,18 +696,42 @@ static roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
*
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
size_t maxbytes);
/**
+ * Read bitmap from a serialized buffer.
+ * In case of failure, NULL is returned.
+ *
+ * Bitmap returned by this function can be used in all readonly contexts.
+ * Bitmap must be freed as usual, by calling roaring_bitmap_free().
+ * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * The function is unsafe in the following ways:
+ * 1) It may execute unaligned memory accesses.
+ * 2) A buffer overflow may occur if buf does not point to a valid serialized
+ * bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
+
+/**
* Check how many bytes would be read (up to maxbytes) at this pointer if there
* is a bitmap, returns zero if there is no valid bitmap.
*
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*/
-static size_t roaring_bitmap_portable_deserialize_size(const char *buf,
+size_t roaring_bitmap_portable_deserialize_size(const char *buf,
size_t maxbytes);
/**
@@ -610,7 +740,7 @@ static size_t roaring_bitmap_portable_deserialize_size(const char *buf,
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*/
-static size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
+size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
/**
* Write a bitmap to a char buffer. The output buffer should refer to at least
@@ -621,8 +751,11 @@ static size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
*
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
/*
* "Frozen" serialization format imitates memory layout of roaring_bitmap_t.
@@ -646,13 +779,16 @@ static size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char
/**
* Returns number of bytes required to serialize bitmap using frozen format.
*/
-static size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
+size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
/**
* Serializes bitmap using frozen format.
* Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
+void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
/**
* Creates constant bitmap that is a view of a given buffer.
@@ -664,8 +800,11 @@ static void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf
* Bitmap returned by this function can be used in all readonly contexts.
* Bitmap must be freed as usual, by calling roaring_bitmap_free().
* Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
*/
-static const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
+const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
size_t length);
/**
@@ -681,29 +820,29 @@ static const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
*
* Iteration is ordered: from the smallest to the largest elements.
*/
-static bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
+bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
void *ptr);
-static bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
+bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
uint64_t high_bits, void *ptr);
/**
* Return true if the two bitmaps contain the same elements.
*/
-static bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
+bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Return true if all the elements of r1 are also in r2.
*/
-static bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
+bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
* Return true if all the elements of r1 are also in r2, and r2 is strictly
* greater than r1.
*/
-static bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
+bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
@@ -721,7 +860,7 @@ static bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
* `bitsetconversion` is a flag which determines whether container-container
* operations force a bitset conversion.
*/
-static roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2,
const bool bitsetconversion);
@@ -733,7 +872,7 @@ static roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1,
* `bitsetconversion` is a flag which determines whether container-container
* operations force a bitset conversion.
*/
-static void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1,
+void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1,
const roaring_bitmap_t *r2,
const bool bitsetconversion);
@@ -743,7 +882,7 @@ static void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1,
* Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()`
* or modified with `roaring_bitmap_lazy_or_inplace()`.
*/
-static void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1);
+void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1);
/**
* Computes the symmetric difference between two bitmaps and returns new bitmap.
@@ -756,7 +895,7 @@ static void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1);
* It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on
* the result.
*/
-static roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
@@ -764,7 +903,7 @@ static roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1,
*
* Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2
*/
-static void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1,
+void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
@@ -772,7 +911,7 @@ static void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1,
* The number of negated values is range_end - range_start.
* Areas outside the range are passed through unchanged.
*/
-static roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1,
+roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1,
uint64_t range_start, uint64_t range_end);
/**
@@ -781,7 +920,7 @@ static roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1,
* range_end - range_start.
* Areas outside the range are passed through unchanged.
*/
-static void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start,
+void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start,
uint64_t range_end);
/**
@@ -790,7 +929,7 @@ static void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_sta
* function returns true and sets element to the element of given rank.
* Otherwise, it returns false.
*/
-static bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
+bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
uint32_t *element);
/**
@@ -803,17 +942,17 @@ static bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
* as having index 0, whereas roaring_bitmap_rank returns 1 when ranking
* the smallest value.
*/
-static uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
+uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
/**
* Returns the smallest value in the set, or UINT32_MAX if the set is empty.
*/
-static uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
+uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
/**
* Returns the greatest value in the set, or 0 if the set is empty.
*/
-static uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
+uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
/**
* (For advanced users.)
@@ -821,7 +960,7 @@ static uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
* Collect statistics about the bitmap, see roaring_types.h for
* a description of roaring_statistics_t
*/
-static void roaring_bitmap_statistics(const roaring_bitmap_t *r,
+void roaring_bitmap_statistics(const roaring_bitmap_t *r,
roaring_statistics_t *stat);
/*********************
@@ -865,7 +1004,7 @@ typedef struct roaring_uint32_iterator_s {
* values. If there is a value, then this iterator points to the first value
* and `it->has_value` is true. The value is in `it->current_value`.
*/
-static void roaring_init_iterator(const roaring_bitmap_t *r,
+void roaring_init_iterator(const roaring_bitmap_t *r,
roaring_uint32_iterator_t *newit);
/**
@@ -873,7 +1012,7 @@ static void roaring_init_iterator(const roaring_bitmap_t *r,
* values. If there is a value, then this iterator points to the last value
* and `it->has_value` is true. The value is in `it->current_value`.
*/
-static void roaring_init_iterator_last(const roaring_bitmap_t *r,
+void roaring_init_iterator_last(const roaring_bitmap_t *r,
roaring_uint32_iterator_t *newit);
/**
@@ -884,41 +1023,41 @@ static void roaring_init_iterator_last(const roaring_bitmap_t *r,
* If there is a value, then this iterator points to the first value and
* `it->has_value` is true. The value is in `it->current_value`.
*/
-static roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r);
+roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r);
/**
* Advance the iterator. If there is a new value, then `it->has_value` is true.
* The new value is in `it->current_value`. Values are traversed in increasing
* orders. For convenience, returns `it->has_value`.
*/
-static bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
+bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
/**
* Decrement the iterator. If there's a new value, then `it->has_value` is true.
* The new value is in `it->current_value`. Values are traversed in decreasing
* order. For convenience, returns `it->has_value`.
*/
-static bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
+bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
/**
* Move the iterator to the first value >= `val`. If there is a such a value,
* then `it->has_value` is true. The new value is in `it->current_value`.
* For convenience, returns `it->has_value`.
*/
-static bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
uint32_t val);
/**
* Creates a copy of an iterator.
* Caller must free it.
*/
-static roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
const roaring_uint32_iterator_t *it);
/**
* Free memory following `roaring_create_iterator()`
*/
-static void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
+void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
/*
* Reads next ${count} values from iterator into user-supplied ${buf}.
@@ -930,7 +1069,7 @@ static void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
* - first value is copied from ${it}->current_value
* - after function returns, iterator is positioned at the next element
*/
-static uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
+uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
uint32_t* buf, uint32_t count);
#ifdef __cplusplus
@@ -955,5 +1094,45 @@ static uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
using namespace ::roaring::api;
#endif
#endif
-
/* end file include/roaring/roaring.h */
+/* begin file include/roaring/memory.h */
+#ifndef INCLUDE_ROARING_MEMORY_H_
+#define INCLUDE_ROARING_MEMORY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h> // for size_t
+
+typedef void* (*roaring_malloc_p)(size_t);
+typedef void* (*roaring_realloc_p)(void*, size_t);
+typedef void* (*roaring_calloc_p)(size_t, size_t);
+typedef void (*roaring_free_p)(void*);
+typedef void* (*roaring_aligned_malloc_p)(size_t, size_t);
+typedef void (*roaring_aligned_free_p)(void*);
+
+typedef struct roaring_memory_s {
+ roaring_malloc_p malloc;
+ roaring_realloc_p realloc;
+ roaring_calloc_p calloc;
+ roaring_free_p free;
+ roaring_aligned_malloc_p aligned_malloc;
+ roaring_aligned_free_p aligned_free;
+} roaring_memory_t;
+
+void roaring_init_memory_hook(roaring_memory_t memory_hook);
+
+void* roaring_malloc(size_t);
+void* roaring_realloc(void*, size_t);
+void* roaring_calloc(size_t, size_t);
+void roaring_free(void*);
+void* roaring_aligned_malloc(size_t, size_t);
+void roaring_aligned_free(void*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INCLUDE_ROARING_MEMORY_H_
+/* end file include/roaring/memory.h */
diff --git a/src/lib/third_party/src/roaring.cc b/src/lib/third_party/src/roaring.c
index 778d36004..58c1ea78c 100644
--- a/src/lib/third_party/src/roaring.cc
+++ b/src/lib/third_party/src/roaring.c
@@ -1,8 +1,12 @@
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
-// Created by amalgamation.sh on Mer 25 Ago 2021 04:24:41 CEST
+// Created by amalgamation.sh on 2023-02-12T11:34:02Z
/*
- * Copyright 2016-2020 The CRoaring authors
+ * The CRoaring project is under a dual license (Apache/MIT).
+ * Users of the library may choose one or the other license.
+ */
+/*
+ * Copyright 2016-2022 The CRoaring authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +22,37 @@
*
* SPDX-License-Identifier: Apache-2.0
*/
+/*
+ * MIT License
+ *
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Permission is hereby granted, free of charge, to any
+ * person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the
+ * Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software
+ * is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+ * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+ * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
#include "roaring.h"
@@ -27,262 +62,53 @@
#endif
#include "roaring.h" /* include public API definitions */
-/* begin file include/roaring/isadetection.h */
-/* From
-https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
-Highly modified.
-
-Copyright (c) 2016- Facebook, Inc (Adam Paszke)
-Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
-Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
-Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
-Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
-Copyright (c) 2011-2013 NYU (Clement Farabet)
-Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
-Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
-(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
-Samy Bengio, Johnny Mariethoz)
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
-3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
-America and IDIAP Research Institute nor the names of its contributors may be
- used to endorse or promote products derived from this software without
- specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef ROARING_ISADETECTION_H
-#define ROARING_ISADETECTION_H
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#if defined(_MSC_VER)
-#include <intrin.h>
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
-#include <cpuid.h>
-#endif // defined(_MSC_VER)
-
-
-enum croaring_instruction_set {
- CROARING_DEFAULT = 0x0,
- CROARING_NEON = 0x1,
- CROARING_AVX2 = 0x4,
- CROARING_SSE42 = 0x8,
- CROARING_PCLMULQDQ = 0x10,
- CROARING_BMI1 = 0x20,
- CROARING_BMI2 = 0x40,
- CROARING_ALTIVEC = 0x80,
- CROARING_UNINITIALIZED = 0x8000
-};
-
-#if defined(__PPC64__)
-
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
- return CROARING_ALTIVEC;
-}
-
-#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
-
-#if defined(__ARM_NEON)
-
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
- return CROARING_NEON;
-}
-
-#else // ARM without NEON
-
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
- return CROARING_DEFAULT;
-}
-
-#endif
-
-#elif defined(__x86_64__) || defined(_M_AMD64) // x64
-
-
-
-
-static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
- uint32_t *edx) {
-
-#if defined(_MSC_VER)
- int cpu_info[4];
- __cpuid(cpu_info, *eax);
- *eax = cpu_info[0];
- *ebx = cpu_info[1];
- *ecx = cpu_info[2];
- *edx = cpu_info[3];
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
- uint32_t level = *eax;
- __get_cpuid(level, eax, ebx, ecx, edx);
-#else
- uint32_t a = *eax, b, c = *ecx, d;
- __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
- *eax = a;
- *ebx = b;
- *ecx = c;
- *edx = d;
-#endif
-}
-
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
- uint32_t eax, ebx, ecx, edx;
- uint32_t host_isa = 0x0;
- // Can be found on Intel ISA Reference for CPUID
- static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7
- static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7
- static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7
- static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1
- static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1
- // ECX for EAX=0x7
- eax = 0x7;
- ecx = 0x0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (ebx & cpuid_avx2_bit) {
- host_isa |= CROARING_AVX2;
- }
- if (ebx & cpuid_bmi1_bit) {
- host_isa |= CROARING_BMI1;
- }
-
- if (ebx & cpuid_bmi2_bit) {
- host_isa |= CROARING_BMI2;
- }
-
- // EBX for EAX=0x1
- eax = 0x1;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- if (ecx & cpuid_sse42_bit) {
- host_isa |= CROARING_SSE42;
- }
-
- if (ecx & cpuid_pclmulqdq_bit) {
- host_isa |= CROARING_PCLMULQDQ;
- }
-
- return host_isa;
-}
-#else // fallback
-
-
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
- return CROARING_DEFAULT;
-}
-
-
-#endif // end SIMD extension detection code
-
-
-#if defined(__x86_64__) || defined(_M_AMD64) // x64
-
-#if defined(__cplusplus)
-#include <atomic>
-static inline uint32_t croaring_detect_supported_architectures() {
- static std::atomic<int> buffer{CROARING_UNINITIALIZED};
- if(buffer == CROARING_UNINITIALIZED) {
- buffer = dynamic_croaring_detect_supported_architectures();
- }
- return buffer;
-}
-#elif defined(_MSC_VER) && !defined(__clang__)
-// Visual Studio does not support C11 atomics.
-static inline uint32_t croaring_detect_supported_architectures() {
- static int buffer = CROARING_UNINITIALIZED;
- if(buffer == CROARING_UNINITIALIZED) {
- buffer = dynamic_croaring_detect_supported_architectures();
- }
- return buffer;
-}
-#else // defined(__cplusplus) and defined(_MSC_VER) && !defined(__clang__)
-#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
-#define ROARING_DISABLE_AVX
-#undef __AVX2__
-/* CentOS 7 */
-static inline uint32_t croaring_detect_supported_architectures() {
- return(dynamic_croaring_detect_supported_architectures());
-}
-#else
-#include <stdatomic.h>
-static inline uint32_t croaring_detect_supported_architectures() {
- static _Atomic int buffer = CROARING_UNINITIALIZED;
- if(buffer == CROARING_UNINITIALIZED) {
- buffer = dynamic_croaring_detect_supported_architectures();
- }
- return buffer;
-}
-#endif // (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
-#endif // defined(_MSC_VER) && !defined(__clang__)
-
-#ifdef ROARING_DISABLE_AVX
-static inline bool croaring_avx2() {
- return false;
-}
-#elif defined(__AVX2__)
-static inline bool croaring_avx2() {
- return true;
-}
-#else
-static inline bool croaring_avx2() {
- return (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
-}
-#endif
-
-
-#else // defined(__x86_64__) || defined(_M_AMD64) // x64
-
-static inline bool croaring_avx2() {
- return false;
-}
-
-static inline uint32_t croaring_detect_supported_architectures() {
- // no runtime dispatch
- return dynamic_croaring_detect_supported_architectures();
-}
-#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
-
-#endif // ROARING_ISADETECTION_H
-/* end file include/roaring/isadetection.h */
/* begin file include/roaring/portability.h */
/*
* portability.h
*
*/
+ /**
+ * All macros should be prefixed with either CROARING or ROARING.
+ * The library uses both ROARING_...
+ * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for
+ * macros that are provided by the build system or that are closely
+ * related to the format. The header macros may also use ROARING_.
+ * The CROARING_ prefix is for internal macros that a user is unlikely
+ * to ever interact with.
+ */
+
#ifndef INCLUDE_PORTABILITY_H_
#define INCLUDE_PORTABILITY_H_
#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
+#define _GNU_SOURCE 1
#endif // _GNU_SOURCE
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS 1
#endif // __STDC_FORMAT_MACROS
-#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
+#ifdef _MSC_VER
+#define CROARING_VISUAL_STUDIO 1
+/**
+ * We want to differentiate carefully between
+ * clang under visual studio and regular visual
+ * studio.
+ */
+#ifdef __clang__
+// clang under visual studio
+#define CROARING_CLANG_VISUAL_STUDIO 1
+#else
+// just regular visual studio (best guess)
+#define CROARING_REGULAR_VISUAL_STUDIO 1
+#endif // __clang__
+#endif // _MSC_VER
+
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
+#undef _POSIX_C_SOURCE
+#endif
+
+#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809L
#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
@@ -292,7 +118,7 @@ static inline uint32_t croaring_detect_supported_architectures() {
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h> // will provide posix_memalign with _POSIX_C_SOURCE as defined above
-#if !(defined(__APPLE__)) && !(defined(__FreeBSD__))
+#ifdef __GLIBC__
#include <malloc.h> // this should never be needed but there are some reports that it is needed.
#endif
@@ -300,7 +126,7 @@ static inline uint32_t croaring_detect_supported_architectures() {
extern "C" { // portability definitions are in global scope, not a namespace
#endif
-#if defined(_MSC_VER) && !defined(__clang__) && !defined(WIN64) && !defined(ROARING_ACK_32BIT)
+#if CROARING_REGULAR_VISUAL_STUDIO && !defined(_WIN64) && !defined(CROARING_ACK_32BIT)
#pragma message( \
"You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.")
#endif
@@ -309,41 +135,72 @@ extern "C" { // portability definitions are in global scope, not a namespace
#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
#endif
-#if defined(_MSC_VER)
+#if CROARING_REGULAR_VISUAL_STUDIO
#define __restrict__ __restrict
-#endif // defined(_MSC_VER
+#endif // CROARING_REGULAR_VISUAL_STUDIO
#if defined(__x86_64__) || defined(_M_X64)
// we have an x64 processor
-#define CROARING_IS_X64
+#define CROARING_IS_X64 1
#if defined(_MSC_VER) && (_MSC_VER < 1910)
// Old visual studio systems won't support AVX2 well.
#undef CROARING_IS_X64
#endif
-#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
- /* RH 7 don't have atomic includes */
-#undef CROARING_IS_X64
-#endif
-
-
#if defined(__clang_major__) && (__clang_major__<= 8) && !defined(__AVX2__)
// Older versions of clang have a bug affecting us
// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces
#undef CROARING_IS_X64
#endif
-#ifdef ROARING_DISABLE_X64
+#ifdef CROARING_DISABLE_X64
#undef CROARING_IS_X64
#endif
// we include the intrinsic header
-#ifndef _MSC_VER
+#if !CROARING_REGULAR_VISUAL_STUDIO
/* Non-Microsoft C/C++-compatible compiler */
#include <x86intrin.h> // on some recent GCC, this will declare posix_memalign
-#endif // _MSC_VER
+
+
+
+#ifdef CROARING_CLANG_VISUAL_STUDIO
+
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn, we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h> (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+#include <bmiintrin.h> // for _blsr_u64
+#include <lzcntintrin.h> // for __lzcnt64
+#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif // _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+
+#endif // CROARING_REGULAR_VISUAL_STUDIO
#endif // defined(__x86_64__) || defined(_M_X64)
#if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
@@ -353,14 +210,13 @@ extern "C" { // portability definitions are in global scope, not a namespace
# include <arm_neon.h>
#endif
-#ifndef _MSC_VER
+#if !CROARING_REGULAR_VISUAL_STUDIO
/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
* assembly */
-#define ROARING_INLINE_ASM
+#define CROARING_INLINE_ASM 1
#endif // _MSC_VER
-
-#ifdef _MSC_VER
+#if CROARING_REGULAR_VISUAL_STUDIO
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
@@ -371,9 +227,9 @@ extern "C" { // portability definitions are in global scope, not a namespace
/* wrappers for Visual Studio built-ins that look like gcc built-ins */
/* result might be undefined when input_num is zero */
-static inline int __builtin_ctzll(unsigned long long input_num) {
+inline int __builtin_ctzll(unsigned long long input_num) {
unsigned long index;
-#ifdef WIN64 // highly recommended!!!
+#ifdef _WIN64 // highly recommended!!!
_BitScanForward64(&index, input_num);
#else // if we must support 32-bit Windows
if ((uint32_t)input_num != 0) {
@@ -387,9 +243,9 @@ static inline int __builtin_ctzll(unsigned long long input_num) {
}
/* result might be undefined when input_num is zero */
-static inline int __builtin_clzll(unsigned long long input_num) {
+inline int __builtin_clzll(unsigned long long input_num) {
unsigned long index;
-#ifdef WIN64 // highly recommended!!!
+#ifdef _WIN64 // highly recommended!!!
_BitScanReverse64(&index, input_num);
#else // if we must support 32-bit Windows
if (input_num > 0xFFFFFFFF) {
@@ -422,45 +278,16 @@ static inline int __builtin_clzll(unsigned long long input_num) {
#endif
-// without the following, we get lots of warnings about posix_memalign
-#ifndef __cplusplus
-extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
-#endif //__cplusplus // C++ does not have a well defined signature
-
-// portable version of posix_memalign
-static inline void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
- void *p;
-#ifdef _MSC_VER
- p = _aligned_malloc(size, alignment);
-#elif defined(__MINGW32__) || defined(__MINGW64__)
- p = __mingw_aligned_malloc(size, alignment);
-#else
- // somehow, if this is used before including "x86intrin.h", it creates an
- // implicit defined warning.
- if (posix_memalign(&p, alignment, size) != 0) return NULL;
-#endif
- return p;
-}
-
-static inline void roaring_bitmap_aligned_free(void *memblock) {
-#ifdef _MSC_VER
- _aligned_free(memblock);
-#elif defined(__MINGW32__) || defined(__MINGW64__)
- __mingw_aligned_free(memblock);
-#else
- ndpi_free(memblock);
-#endif
-}
-
-#if defined(_MSC_VER)
+#if CROARING_REGULAR_VISUAL_STUDIO
#define ALIGNED(x) __declspec(align(x))
-#else
-#if defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
#define ALIGNED(x) __attribute__((aligned(x)))
-#endif
+#else
+#warning "Warning. Unrecognized compiler."
+#define ALIGNED(x)
#endif
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
#define WARN_UNUSED __attribute__((warn_unused_result))
#else
#define WARN_UNUSED
@@ -468,6 +295,10 @@ static inline void roaring_bitmap_aligned_free(void *memblock) {
#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
+#ifdef USENEON
+// we can always compute the popcount fast.
+#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
+// we will need this function:
static inline int hammingbackup(uint64_t x) {
uint64_t c1 = UINT64_C(0x5555555555555555);
uint64_t c2 = UINT64_C(0x3333333333333333);
@@ -477,16 +308,20 @@ static inline int hammingbackup(uint64_t x) {
x *= UINT64_C(0x0101010101010101);
return x >> 56;
}
+#endif
+
static inline int hamming(uint64_t x) {
-#if defined(WIN64) && defined(_MSC_VER) && !defined(__clang__)
-#ifdef _M_ARM64
+#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
+#ifdef USENEON
+ return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+#elif defined(_M_ARM64)
return hammingbackup(x);
// (int) _CountOneBits64(x); is unavailable
#else // _M_ARM64
return (int) __popcnt64(x);
#endif // _M_ARM64
-#elif defined(WIN32) && defined(_MSC_VER) && !defined(__clang__)
+#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
#ifdef _M_ARM
return hammingbackup(x);
// _CountOneBits is unavailable
@@ -568,8 +403,287 @@ static inline int hamming(uint64_t x) {
#define CROARING_UNTARGET_REGION
#endif
+// Allow unaligned memory access
+#if defined(__GNUC__) || defined(__clang__)
+#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
+#else
+#define ALLOW_UNALIGNED
+#endif
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #elif defined(_WIN32)
+ #define CROARING_IS_BIG_ENDIAN 0
+ #else
+ #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+ #include <machine/endian.h>
+ #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
+ #include <sys/byteorder.h>
+ #else // defined(__APPLE__) || defined(__FreeBSD__)
+
+ #ifdef __has_include
+ #if __has_include(<endian.h>)
+ #include <endian.h>
+ #endif //__has_include(<endian.h>)
+ #endif //__has_include
+
+ #endif // defined(__APPLE__) || defined(__FreeBSD__)
+
+
+ #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
+ #define CROARING_IS_BIG_ENDIAN 0
+ #endif
+
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define CROARING_IS_BIG_ENDIAN 0
+ #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define CROARING_IS_BIG_ENDIAN 1
+ #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#endif
+
+// We need portability.h to be included first,
+// but we also always want isadetection.h to be
+// included (right after).
+// See https://github.com/RoaringBitmap/CRoaring/issues/394
+// There is no scenario where we want portability.h to
+// be included, but not isadetection.h: the latter is a
+// strict requirement.
#endif /* INCLUDE_PORTABILITY_H_ */
/* end file include/roaring/portability.h */
+/* begin file include/roaring/isadetection.h */
+/* From
+https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
+Highly modified.
+
+Copyright (c) 2016- Facebook, Inc (Adam Paszke)
+Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
+Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
+(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
+Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
+America and IDIAP Research Institute nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ROARING_ISADETECTION_H
+#define ROARING_ISADETECTION_H
+
+// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H).
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+// We need portability.h to be included first, see
+// https://github.com/RoaringBitmap/CRoaring/issues/394
+#if CROARING_REGULAR_VISUAL_STUDIO
+#include <intrin.h>
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+#include <cpuid.h>
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+
+enum croaring_instruction_set {
+ CROARING_DEFAULT = 0x0,
+ CROARING_NEON = 0x1,
+ CROARING_AVX2 = 0x4,
+ CROARING_SSE42 = 0x8,
+ CROARING_PCLMULQDQ = 0x10,
+ CROARING_BMI1 = 0x20,
+ CROARING_BMI2 = 0x40,
+ CROARING_ALTIVEC = 0x80,
+ CROARING_UNINITIALIZED = 0x8000
+};
+
+#if defined(__PPC64__)
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_ALTIVEC;
+//}
+
+#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
+
+#if defined(__ARM_NEON)
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_NEON;
+//}
+
+#else // ARM without NEON
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_DEFAULT;
+//}
+
+#endif
+
+#elif defined(__x86_64__) || defined(_M_AMD64) // x64
+
+
+
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+ uint32_t *edx) {
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+ int cpu_info[4];
+ __cpuid(cpu_info, *eax);
+ *eax = cpu_info[0];
+ *ebx = cpu_info[1];
+ *ecx = cpu_info[2];
+ *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+ uint32_t level = *eax;
+ __get_cpuid(level, eax, ebx, ecx, edx);
+#else
+ uint32_t a = *eax, b, c = *ecx, d;
+ __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+ *eax = a;
+ *ebx = b;
+ *ecx = c;
+ *edx = d;
+#endif
+}
+
+static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t host_isa = 0x0;
+ // Can be found on Intel ISA Reference for CPUID
+ static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7
+ static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7
+ static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7
+ static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1
+ static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1
+ // ECX for EAX=0x7
+ eax = 0x7;
+ ecx = 0x0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx & cpuid_avx2_bit) {
+ host_isa |= CROARING_AVX2;
+ }
+ if (ebx & cpuid_bmi1_bit) {
+ host_isa |= CROARING_BMI1;
+ }
+
+ if (ebx & cpuid_bmi2_bit) {
+ host_isa |= CROARING_BMI2;
+ }
+
+ // EBX for EAX=0x1
+ eax = 0x1;
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (ecx & cpuid_sse42_bit) {
+ host_isa |= CROARING_SSE42;
+ }
+
+ if (ecx & cpuid_pclmulqdq_bit) {
+ host_isa |= CROARING_PCLMULQDQ;
+ }
+
+ return host_isa;
+}
+#else // fallback
+
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_DEFAULT;
+//}
+
+
+#endif // end SIMD extension detection code
+
+
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#if defined(__cplusplus)
+static inline uint32_t croaring_detect_supported_architectures() {
+ // thread-safe as per the C++11 standard.
+ static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
+ return buffer;
+}
+#elif CROARING_VISUAL_STUDIO
+// Visual Studio does not support C11 atomics.
+static inline uint32_t croaring_detect_supported_architectures() {
+ static int buffer = CROARING_UNINITIALIZED;
+ if (buffer == CROARING_UNINITIALIZED) {
+ buffer = dynamic_croaring_detect_supported_architectures();
+ }
+ return buffer;
+}
+#else // CROARING_VISUAL_STUDIO
+#include <stdatomic.h>
+static inline uint32_t croaring_detect_supported_architectures() {
+ // we use an atomic for thread safety
+ static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
+ if (buffer == CROARING_UNINITIALIZED) {
+ // atomicity is sufficient
+ buffer = dynamic_croaring_detect_supported_architectures();
+ }
+ return buffer;
+}
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+#ifdef ROARING_DISABLE_AVX
+static inline bool croaring_avx2() {
+ return false;
+}
+#elif defined(__AVX2__)
+static inline bool croaring_avx2() {
+ return true;
+}
+#else
+static inline bool croaring_avx2() {
+ return (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
+}
+#endif
+
+
+#else // defined(__x86_64__) || defined(_M_AMD64) // x64
+
+//static inline bool croaring_avx2() {
+// return false;
+//}
+
+//static inline uint32_t croaring_detect_supported_architectures() {
+// // no runtime dispatch
+// return dynamic_croaring_detect_supported_architectures();
+//}
+#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#endif // ROARING_ISADETECTION_H
+/* end file include/roaring/isadetection.h */
/* begin file include/roaring/containers/perfparameters.h */
#ifndef PERFPARAMETERS_H_
#define PERFPARAMETERS_H_
@@ -682,10 +796,10 @@ typedef ROARING_CONTAINER_T container_t;
* downcast; only a static_cast<> is needed. Define a macro for static casting
* which helps make casts more visible, and catches problems at compile-time
* when building the C sources in C++ mode:
- *
+ *
* void some_func(container_t **c, ...) { // double pointer, not single
* array_container_t *ac1 = (array_container_t *)(c); // uncaught!!
- *
+ *
* array_container_t *ac2 = CAST(array_container_t *, c) // C++ errors
* array_container_t *ac3 = CAST_array(c); // shorthand for #2, errors
* }
@@ -694,7 +808,7 @@ typedef ROARING_CONTAINER_T container_t;
* needs a reinterpret_cast<>, which sacrifices safety...so a template is used
* leveraging <type_traits> to make sure it's legal in the C++ build.
*/
-#ifdef __cplusplus
+#ifdef __cplusplus
#define CAST(type,value) static_cast<type>(value)
#define movable_CAST(type,value) movable_CAST_HELPER<type>(value)
@@ -744,7 +858,7 @@ extern "C" { namespace roaring { namespace internal {
* if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey)
* keys the array sorted.
*/
-static inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
+inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
uint16_t ikey) {
int32_t low = 0;
int32_t high = lenarray - 1;
@@ -847,121 +961,121 @@ static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,
* C should have capacity greater than the minimum of s_1 and s_b + 8
* where 8 is sizeof(__m128i)/sizeof(uint16_t).
*/
-static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
+int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
const uint16_t *__restrict__ B, size_t s_b,
uint16_t *C);
/**
* Compute the cardinality of the intersection using SSE4 instructions
*/
-static int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
+int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
size_t s_a,
const uint16_t *__restrict__ B,
size_t s_b);
/* Computes the intersection between one small and one large set of uint16_t.
* Stores the result into buffer and return the number of elements. */
-static int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s,
+int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s,
const uint16_t *largearray, size_t size_l,
uint16_t *buffer);
/* Computes the size of the intersection between one small and one large set of
* uint16_t. */
-static int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray,
+int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray,
size_t size_s,
const uint16_t *largearray,
size_t size_l);
/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */
-static bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s,
+bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s,
const uint16_t *largearray, size_t size_l);
/**
* Generic intersection function.
*/
-static int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
+int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
const uint16_t *B, const size_t lenB, uint16_t *out);
/**
* Compute the size of the intersection (generic).
*/
-static int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
+int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
const uint16_t *B, const size_t lenB);
/**
* Checking whether the size of the intersection is non-zero.
*/
-static bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
+bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
const uint16_t *B, const size_t lenB);
/**
* Generic union function.
*/
-static size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
size_t size_2, uint16_t *buffer);
/**
* Generic XOR function.
*/
-static int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
+int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
const uint16_t *array_2, int32_t card_2, uint16_t *out);
/**
* Generic difference function (ANDNOT).
*/
-static int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
+int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
int length2, uint16_t *a_out);
/**
* Generic intersection function.
*/
-static size_t intersection_uint32(const uint32_t *A, const size_t lenA,
+size_t intersection_uint32(const uint32_t *A, const size_t lenA,
const uint32_t *B, const size_t lenB, uint32_t *out);
/**
* Generic intersection function, returns just the cardinality.
*/
-static size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
+size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
const uint32_t *B, const size_t lenB);
/**
* Generic union function.
*/
-static size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
+size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
size_t size_2, uint32_t *buffer);
/**
* A fast SSE-based union function.
*/
-static uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1,
+uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1,
const uint16_t *__restrict__ set_2, uint32_t size_2,
uint16_t *__restrict__ buffer);
/**
* A fast SSE-based XOR function.
*/
-static uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
+uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
const uint16_t *__restrict__ array2, uint32_t length2,
uint16_t *__restrict__ output);
/**
* A fast SSE-based difference function.
*/
-static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
+int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
const uint16_t *__restrict__ B, size_t s_b,
uint16_t *C);
/**
* Generic union function, returns just the cardinality.
*/
-static size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
+size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
const uint32_t *set_2, size_t size_2);
/**
* combines union_uint16 and union_vector16 optimally
*/
-static size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
size_t size_2, uint16_t *buffer);
-static bool memequals(const void *s1, const void *s2, size_t n);
+bool memequals(const void *s1, const void *s2, size_t n);
#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
@@ -983,7 +1097,7 @@ static bool memequals(const void *s1, const void *s2, size_t n);
extern "C" { namespace roaring {
#endif
-#if defined(ROARING_INLINE_ASM)
+#if defined(CROARING_INLINE_ASM)
#define CROARING_ASMBITMANIPOPTIMIZATION // optimization flag
#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \
@@ -1073,7 +1187,7 @@ static inline void bitset_set_range(uint64_t *words, uint32_t start,
return;
}
words[firstword] |= (~UINT64_C(0)) << (start % 64);
- uint32_t i; for (i = firstword + 1; i < endword; i++) {
+ for (uint32_t i = firstword + 1; i < endword; i++) {
words[i] = ~UINT64_C(0);
}
words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64);
@@ -1094,7 +1208,7 @@ static inline int bitset_lenrange_cardinality(const uint64_t *words,
<< (start % 64));
}
int answer = hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
- uint32_t i; for (i = firstword + 1; i < endword; i++) {
+ for (uint32_t i = firstword + 1; i < endword; i++) {
answer += hamming(words[i]);
}
answer +=
@@ -1117,7 +1231,7 @@ static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start,
if (((words[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) {
return false;
}
- uint32_t i; for (i = firstword + 1; i < endword; i++) {
+ for (uint32_t i = firstword + 1; i < endword; i++) {
if (words[i] != 0) {
return false;
}
@@ -1143,7 +1257,7 @@ static inline void bitset_set_lenrange(uint64_t *words, uint32_t start,
}
uint64_t temp = words[endword];
words[firstword] |= (~UINT64_C(0)) << (start % 64);
- uint32_t i; for (i = firstword + 1; i < endword; i += 2)
+ for (uint32_t i = firstword + 1; i < endword; i += 2)
words[i] = words[i + 1] = ~UINT64_C(0);
words[endword] =
temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
@@ -1158,7 +1272,7 @@ static inline void bitset_flip_range(uint64_t *words, uint32_t start,
uint32_t firstword = start / 64;
uint32_t endword = (end - 1) / 64;
words[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
- uint32_t i; for (i = firstword; i < endword; i++) {
+ for (uint32_t i = firstword; i < endword; i++) {
words[i] = ~words[i];
}
words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
@@ -1178,7 +1292,7 @@ static inline void bitset_reset_range(uint64_t *words, uint32_t start,
return;
}
words[firstword] &= ~((~UINT64_C(0)) << (start % 64));
- uint32_t i; for (i = firstword + 1; i < endword; i++) {
+ for (uint32_t i = firstword + 1; i < endword; i++) {
words[i] = UINT64_C(0);
}
words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64));
@@ -1199,7 +1313,7 @@ static inline void bitset_reset_range(uint64_t *words, uint32_t start,
*
* This function uses AVX2 decoding.
*/
-static size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
+size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
uint32_t *out, size_t outcapacity,
uint32_t base);
@@ -1212,7 +1326,7 @@ static size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
*
* Returns how many values were actually decoded.
*/
-static size_t bitset_extract_setbits(const uint64_t *words, size_t length,
+size_t bitset_extract_setbits(const uint64_t *words, size_t length,
uint32_t *out, uint32_t base);
/*
@@ -1231,7 +1345,7 @@ static size_t bitset_extract_setbits(const uint64_t *words, size_t length,
*
* This function uses SSE decoding.
*/
-static size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
+size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
uint16_t *out, size_t outcapacity,
uint16_t base);
@@ -1245,7 +1359,7 @@ static size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t le
*
* Returns how many values were actually decoded.
*/
-static size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
+size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
uint16_t *out, uint16_t base);
/*
@@ -1258,7 +1372,7 @@ static size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length
*
* Returns how many values were actually decoded.
*/
-static size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
+size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
const uint64_t * __restrict__ words2,
size_t length, uint16_t *out,
uint16_t base);
@@ -1269,13 +1383,13 @@ static size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __rest
* and return the updated cardinality. This evidently assumes that the bitset
* already contained data.
*/
-static uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
+uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
const uint16_t *list, uint64_t length);
/*
* Given a bitset, set all bit values in the list (there
* are length of them).
*/
-static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length);
+void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length);
/*
* Given a bitset having cardinality card, unset all bit values in the list
@@ -1283,7 +1397,7 @@ static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t leng
* and return the updated cardinality. This evidently assumes that the bitset
* already contained data.
*/
-static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
uint64_t length);
/*
@@ -1293,10 +1407,10 @@ static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t
* already contained data.
*/
-static uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
+uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
const uint16_t *list, uint64_t length);
-static void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length);
+void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length);
#ifdef CROARING_IS_X64
/***
@@ -1354,7 +1468,7 @@ CROARING_TARGET_AVX2
/**
* Fast Harley-Seal AVX population count function
*/
-static inline uint64_t avx2_harley_seal_popcount256(const __m256i *data,
+inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data,
const uint64_t size) {
__m256i total = _mm256_setzero_si256();
__m256i ones = _mm256_setzero_si256();
@@ -1676,27 +1790,28 @@ typedef struct array_container_s array_container_t;
/* Create a new array with default. Return NULL in case of failure. See also
* array_container_create_given_capacity. */
-static array_container_t *array_container_create(void);
+array_container_t *array_container_create(void);
/* Create a new array with a specified capacity size. Return NULL in case of
* failure. */
-static array_container_t *array_container_create_given_capacity(int32_t size);
+array_container_t *array_container_create_given_capacity(int32_t size);
/* Create a new array containing all values in [min,max). */
-static array_container_t * array_container_create_range(uint32_t min, uint32_t max);
+array_container_t * array_container_create_range(uint32_t min, uint32_t max);
/*
* Shrink the capacity to the actual size, return the number of bytes saved.
*/
-static int array_container_shrink_to_fit(array_container_t *src);
+int array_container_shrink_to_fit(array_container_t *src);
/* Free memory owned by `array'. */
-static void array_container_free(array_container_t *array);
+void array_container_free(array_container_t *array);
/* Duplicate container */
-static array_container_t *array_container_clone(const array_container_t *src);
+array_container_t *array_container_clone(const array_container_t *src);
/* Get the cardinality of `array'. */
+ALLOW_UNALIGNED
static inline int array_container_cardinality(const array_container_t *array) {
return array->cardinality;
}
@@ -1707,18 +1822,14 @@ static inline bool array_container_nonzero_cardinality(
}
/* Copy one container into another. We assume that they are distinct. */
-static void array_container_copy(const array_container_t *src, array_container_t *dst);
+void array_container_copy(const array_container_t *src, array_container_t *dst);
/* Add all the values in [min,max) (included) at a distance k*step from min.
The container must have a size less or equal to DEFAULT_MAX_SIZE after this
addition. */
-static void array_container_add_from_range(array_container_t *arr, uint32_t min,
+void array_container_add_from_range(array_container_t *arr, uint32_t min,
uint32_t max, uint16_t step);
-/* Set the cardinality to zero (does not release memory). */
-static inline void array_container_clear(array_container_t *array) {
- array->cardinality = 0;
-}
static inline bool array_container_empty(const array_container_t *array) {
return array->cardinality == 0;
@@ -1733,35 +1844,35 @@ static inline bool array_container_full(const array_container_t *array) {
/* Compute the union of `src_1' and `src_2' and write the result to `dst'
* It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
-static void array_container_union(const array_container_t *src_1,
+void array_container_union(const array_container_t *src_1,
const array_container_t *src_2,
array_container_t *dst);
/* symmetric difference, see array_container_union */
-static void array_container_xor(const array_container_t *array_1,
+void array_container_xor(const array_container_t *array_1,
const array_container_t *array_2,
array_container_t *out);
/* Computes the intersection of src_1 and src_2 and write the result to
* dst. It is assumed that dst is distinct from both src_1 and src_2. */
-static void array_container_intersection(const array_container_t *src_1,
+void array_container_intersection(const array_container_t *src_1,
const array_container_t *src_2,
array_container_t *dst);
/* Check whether src_1 and src_2 intersect. */
-static bool array_container_intersect(const array_container_t *src_1,
+bool array_container_intersect(const array_container_t *src_1,
const array_container_t *src_2);
/* computers the size of the intersection between two arrays.
*/
-static int array_container_intersection_cardinality(const array_container_t *src_1,
+int array_container_intersection_cardinality(const array_container_t *src_1,
const array_container_t *src_2);
/* computes the intersection of array1 and array2 and write the result to
* array1.
* */
-static void array_container_intersection_inplace(array_container_t *src_1,
+void array_container_intersection_inplace(array_container_t *src_1,
const array_container_t *src_2);
/*
@@ -1772,22 +1883,22 @@ static void array_container_intersection_inplace(array_container_t *src_1,
* The function returns the number of values written.
* The caller is responsible for allocating enough memory in out.
*/
-static int array_container_to_uint32_array(void *vout, const array_container_t *cont,
+int array_container_to_uint32_array(void *vout, const array_container_t *cont,
uint32_t base);
/* Compute the number of runs */
-static int32_t array_container_number_of_runs(const array_container_t *ac);
+int32_t array_container_number_of_runs(const array_container_t *ac);
/*
* Print this container using printf (useful for debugging).
*/
-static void array_container_printf(const array_container_t *v);
+void array_container_printf(const array_container_t *v);
/*
* Print this container using printf as a comma-separated list of 32-bit
* integers starting at base.
*/
-static void array_container_printf_as_uint32_array(const array_container_t *v,
+void array_container_printf_as_uint32_array(const array_container_t *v,
uint32_t base);
/**
@@ -1803,12 +1914,12 @@ static inline int32_t array_container_serialized_size_in_bytes(int32_t card) {
* parameter. If preserve is false, then the new content will be uninitialized,
* otherwise the old content is copied.
*/
-static void array_container_grow(array_container_t *container, int32_t min,
+void array_container_grow(array_container_t *container, int32_t min,
bool preserve);
-static bool array_container_iterate(const array_container_t *cont, uint32_t base,
+bool array_container_iterate(const array_container_t *cont, uint32_t base,
roaring_iterator iterator, void *ptr);
-static bool array_container_iterate64(const array_container_t *cont, uint32_t base,
+bool array_container_iterate64(const array_container_t *cont, uint32_t base,
roaring_iterator64 iterator, uint64_t high_bits,
void *ptr);
@@ -1820,7 +1931,7 @@ static bool array_container_iterate64(const array_container_t *cont, uint32_t ba
* array_container_size_in_bytes(container).
*
*/
-static int32_t array_container_write(const array_container_t *container, char *buf);
+int32_t array_container_write(const array_container_t *container, char *buf);
/**
* Reads the instance from buf, outputs how many bytes were read.
* This is meant to be byte-by-byte compatible with the Java and Go versions of
@@ -1828,7 +1939,7 @@ static int32_t array_container_write(const array_container_t *container, char *b
* The number of bytes read should be array_container_size_in_bytes(container).
* You need to provide the (known) cardinality.
*/
-static int32_t array_container_read(int32_t cardinality, array_container_t *container,
+int32_t array_container_read(int32_t cardinality, array_container_t *container,
const char *buf);
/**
@@ -1847,6 +1958,7 @@ static inline int32_t array_container_size_in_bytes(
/**
* Return true if the two arrays have the same content.
*/
+ALLOW_UNALIGNED
static inline bool array_container_equals(
const array_container_t *container1,
const array_container_t *container2) {
@@ -1860,7 +1972,7 @@ static inline bool array_container_equals(
/**
* Return true if container1 is a subset of container2.
*/
-static bool array_container_is_subset(const array_container_t *container1,
+bool array_container_is_subset(const array_container_t *container1,
const array_container_t *container2);
/**
@@ -1886,7 +1998,7 @@ static inline bool array_container_select(const array_container_t *container,
* to array out.
* Array out does not need to be distinct from array_1
*/
-static void array_container_andnot(const array_container_t *array_1,
+void array_container_andnot(const array_container_t *array_1,
const array_container_t *array_2,
array_container_t *out);
@@ -1960,7 +2072,7 @@ static inline bool array_container_remove(array_container_t *arr,
}
/* Check whether x is present. */
-static inline bool array_container_contains(const array_container_t *arr,
+inline bool array_container_contains(const array_container_t *arr,
uint16_t pos) {
// return binarySearch(arr->array, arr->cardinality, pos) >= 0;
// binary search with fallback to linear search for short ranges
@@ -1980,8 +2092,7 @@ static inline bool array_container_contains(const array_container_t *arr,
}
}
- int i;
- for (i=low; i <= high; i++) {
+ for (int i=low; i <= high; i++) {
uint16_t v = carr[i];
if (v == pos) {
return true;
@@ -1992,37 +2103,47 @@ static inline bool array_container_contains(const array_container_t *arr,
}
+void array_container_offset(const array_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset);
+
//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */
static inline bool array_container_contains_range(const array_container_t *arr,
uint32_t range_start, uint32_t range_end) {
-
+ const int32_t range_count = range_end - range_start;
const uint16_t rs_included = range_start;
const uint16_t re_included = range_end - 1;
- const uint16_t *carr = (const uint16_t *) arr->array;
-
- const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included);
- const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included);
+ // Empty range is always included
+ if (range_count <= 0) {
+ return true;
+ }
+ if (range_count > arr->cardinality) {
+ return false;
+ }
- return (start < arr->cardinality) && (end < arr->cardinality)
- && (((uint16_t)(end - start)) == re_included - rs_included)
- && (carr[start] == rs_included) && (carr[end] == re_included);
+ const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included);
+ // If this sorted array contains all items in the range:
+ // * the start item must be found
+ // * the last item in range range_count must exist, and be the expected end value
+ return (start >= 0) && (arr->cardinality >= start + range_count) &&
+ (arr->array[start + range_count - 1] == re_included);
}
/* Returns the smallest value (assumes not empty) */
-static inline uint16_t array_container_minimum(const array_container_t *arr) {
+inline uint16_t array_container_minimum(const array_container_t *arr) {
if (arr->cardinality == 0) return 0;
return arr->array[0];
}
/* Returns the largest value (assumes not empty) */
-static inline uint16_t array_container_maximum(const array_container_t *arr) {
+inline uint16_t array_container_maximum(const array_container_t *arr) {
if (arr->cardinality == 0) return 0;
return arr->array[arr->cardinality - 1];
}
/* Returns the number of values equal or smaller than x */
-static inline int array_container_rank(const array_container_t *arr, uint16_t x) {
+inline int array_container_rank(const array_container_t *arr, uint16_t x) {
const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
const bool is_present = idx >= 0;
if (is_present) {
@@ -2033,7 +2154,7 @@ static inline int array_container_rank(const array_container_t *arr, uint16_t x)
}
/* Returns the index of the first value equal or smaller than x, or -1 */
-static inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
+inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
const bool is_present = idx >= 0;
if (is_present) {
@@ -2061,21 +2182,22 @@ static inline void array_container_add_range_nvals(array_container_t *array,
memmove(&(array->array[union_cardinality - nvals_greater]),
&(array->array[array->cardinality - nvals_greater]),
nvals_greater * sizeof(uint16_t));
- uint32_t i; for (i = 0; i <= max - min; i++) {
+ for (uint32_t i = 0; i <= max - min; i++) {
array->array[nvals_less + i] = min + i;
}
array->cardinality = union_cardinality;
}
/**
- * Adds all values in range [min,max].
+ * Adds all values in range [min,max]. This function is currently unused
+ * and left as a documentation.
*/
-static inline void array_container_add_range(array_container_t *array,
+/*static inline void array_container_add_range(array_container_t *array,
uint32_t min, uint32_t max) {
int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
-}
+}*/
/*
* Removes all elements array[pos] .. array[pos+count-1]
@@ -2138,24 +2260,24 @@ typedef struct bitset_container_s bitset_container_t;
#define movable_CAST_bitset(c) movable_CAST(bitset_container_t **, c)
/* Create a new bitset. Return NULL in case of failure. */
-static bitset_container_t *bitset_container_create(void);
+bitset_container_t *bitset_container_create(void);
/* Free memory. */
-static void bitset_container_free(bitset_container_t *bitset);
+void bitset_container_free(bitset_container_t *bitset);
/* Clear bitset (sets bits to 0). */
-static void bitset_container_clear(bitset_container_t *bitset);
+void bitset_container_clear(bitset_container_t *bitset);
/* Set all bits to 1. */
-static void bitset_container_set_all(bitset_container_t *bitset);
+void bitset_container_set_all(bitset_container_t *bitset);
/* Duplicate bitset */
-static bitset_container_t *bitset_container_clone(const bitset_container_t *src);
+bitset_container_t *bitset_container_clone(const bitset_container_t *src);
/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow
* and
* should not be used in performance-sensitive code. Ever. */
-static void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
+void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
uint32_t end);
#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(__AVX2__)
@@ -2171,8 +2293,8 @@ static inline void bitset_container_set(bitset_container_t *bitset,
bitset->words[offset] = load;
}
-/* Unset the ith bit. */
-static inline void bitset_container_unset(bitset_container_t *bitset,
+/* Unset the ith bit. Currently unused. Could be used for optimization. */
+/*static inline void bitset_container_unset(bitset_container_t *bitset,
uint16_t pos) {
uint64_t shift = 6;
uint64_t offset;
@@ -2181,7 +2303,7 @@ static inline void bitset_container_unset(bitset_container_t *bitset,
uint64_t load = bitset->words[offset];
ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
bitset->words[offset] = load;
-}
+}*/
/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
* than bitset_container_set. */
@@ -2216,7 +2338,7 @@ static inline bool bitset_container_remove(bitset_container_t *bitset,
}
/* Get the value of the ith bit. */
-static inline bool bitset_container_get(const bitset_container_t *bitset,
+inline bool bitset_container_get(const bitset_container_t *bitset,
uint16_t pos) {
uint64_t word = bitset->words[pos >> 6];
const uint64_t p = pos;
@@ -2236,15 +2358,15 @@ static inline void bitset_container_set(bitset_container_t *bitset,
bitset->words[pos >> 6] = new_word;
}
-/* Unset the ith bit. */
-static inline void bitset_container_unset(bitset_container_t *bitset,
+/* Unset the ith bit. Currently unused. */
+/*static inline void bitset_container_unset(bitset_container_t *bitset,
uint16_t pos) {
const uint64_t old_word = bitset->words[pos >> 6];
const int index = pos & 63;
const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index);
bitset->words[pos >> 6] = new_word;
-}
+}*/
/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
* than bitset_container_set. */
@@ -2273,7 +2395,7 @@ static inline bool bitset_container_remove(bitset_container_t *bitset,
}
/* Get the value of the ith bit. */
-static inline bool bitset_container_get(const bitset_container_t *bitset,
+inline bool bitset_container_get(const bitset_container_t *bitset,
uint16_t pos) {
const uint64_t word = bitset->words[pos >> 6];
return (word >> (pos & 63)) & 1;
@@ -2302,7 +2424,7 @@ static inline bool bitset_container_get_range(const bitset_container_t *bitset,
return false;
}
- uint32_t i; for (i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){
+ for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){
if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
}
@@ -2311,7 +2433,7 @@ static inline bool bitset_container_get_range(const bitset_container_t *bitset,
}
/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */
-static inline bool bitset_container_contains(const bitset_container_t *bitset,
+inline bool bitset_container_contains(const bitset_container_t *bitset,
uint16_t pos) {
return bitset_container_get(bitset, pos);
}
@@ -2326,43 +2448,35 @@ static inline bool bitset_container_contains_range(const bitset_container_t *bit
}
/* Get the number of bits set */
+ALLOW_UNALIGNED
static inline int bitset_container_cardinality(
const bitset_container_t *bitset) {
return bitset->cardinality;
}
+
+
+
/* Copy one container into another. We assume that they are distinct. */
-static void bitset_container_copy(const bitset_container_t *source,
+void bitset_container_copy(const bitset_container_t *source,
bitset_container_t *dest);
/* Add all the values [min,max) at a distance k*step from min: min,
* min+step,.... */
-static void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
+void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
uint32_t max, uint16_t step);
/* Get the number of bits set (force computation). This does not modify bitset.
* To update the cardinality, you should do
* bitset->cardinality = bitset_container_compute_cardinality(bitset).*/
-static int bitset_container_compute_cardinality(const bitset_container_t *bitset);
+int bitset_container_compute_cardinality(const bitset_container_t *bitset);
-/* Get whether there is at least one bit set (see bitset_container_empty for the reverse),
- when the cardinality is unknown, it is computed and stored in the struct */
-static inline bool bitset_container_nonzero_cardinality(
- bitset_container_t *bitset) {
- // account for laziness
- if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
- // could bail early instead with a nonzero result
- bitset->cardinality = bitset_container_compute_cardinality(bitset);
- }
- return bitset->cardinality > 0;
-}
-
-/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse),
+/* Check whether this bitset is empty,
* it never modifies the bitset struct. */
static inline bool bitset_container_empty(
const bitset_container_t *bitset) {
if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
if((bitset->words[i]) != 0) return false;
}
return true;
@@ -2381,99 +2495,102 @@ static inline bool bitset_container_const_nonzero_cardinality(
/*
* Check whether the two bitsets intersect
*/
-static bool bitset_container_intersect(const bitset_container_t *src_1,
+bool bitset_container_intersect(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
* cardinality. */
-static int bitset_container_or(const bitset_container_t *src_1,
+int bitset_container_or(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality.
*/
-static int bitset_container_or_justcard(const bitset_container_t *src_1,
+int bitset_container_or_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
* cardinality. Same as bitset_container_or. */
-static int bitset_container_union(const bitset_container_t *src_1,
+int bitset_container_union(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the union of bitsets `src_1' and `src_2' and return the
* cardinality. Same as bitset_container_or_justcard. */
-static int bitset_container_union_justcard(const bitset_container_t *src_1,
+int bitset_container_union_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
* update the cardinality. Provided to optimize chained operations. */
-static int bitset_container_or_nocard(const bitset_container_t *src_1,
+int bitset_container_or_nocard(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
* return the cardinality. */
-static int bitset_container_and(const bitset_container_t *src_1,
+int bitset_container_and(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the intersection of bitsets `src_1' and `src_2' and return the
* cardinality. */
-static int bitset_container_and_justcard(const bitset_container_t *src_1,
+int bitset_container_and_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
* return the cardinality. Same as bitset_container_and. */
-static int bitset_container_intersection(const bitset_container_t *src_1,
+int bitset_container_intersection(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the intersection of bitsets `src_1' and `src_2' and return the
* cardinality. Same as bitset_container_and_justcard. */
-static int bitset_container_intersection_justcard(const bitset_container_t *src_1,
+int bitset_container_intersection_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
* not update the cardinality. Provided to optimize chained operations. */
-static int bitset_container_and_nocard(const bitset_container_t *src_1,
+int bitset_container_and_nocard(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and
* return the cardinality. */
-static int bitset_container_xor(const bitset_container_t *src_1,
+int bitset_container_xor(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the
* cardinality. */
-static int bitset_container_xor_justcard(const bitset_container_t *src_1,
+int bitset_container_xor_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does
* not update the cardinality. Provided to optimize chained operations. */
-static int bitset_container_xor_nocard(const bitset_container_t *src_1,
+int bitset_container_xor_nocard(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the
* cardinality. */
-static int bitset_container_andnot(const bitset_container_t *src_1,
+int bitset_container_andnot(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Computes the and not of bitsets `src_1' and `src_2' and return the
* cardinality. */
-static int bitset_container_andnot_justcard(const bitset_container_t *src_1,
+int bitset_container_andnot_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does
* not update the cardinality. Provided to optimize chained operations. */
-static int bitset_container_andnot_nocard(const bitset_container_t *src_1,
+int bitset_container_andnot_nocard(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
+void bitset_container_offset(const bitset_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset);
/*
* Write out the 16-bit integers contained in this container as a list of 32-bit
* integers using base
@@ -2484,20 +2601,20 @@ static int bitset_container_andnot_nocard(const bitset_container_t *src_1,
* The out pointer should point to enough memory (the cardinality times 32
* bits).
*/
-static int bitset_container_to_uint32_array(uint32_t *out,
+int bitset_container_to_uint32_array(uint32_t *out,
const bitset_container_t *bc,
uint32_t base);
/*
* Print this container using printf (useful for debugging).
*/
-static void bitset_container_printf(const bitset_container_t *v);
+void bitset_container_printf(const bitset_container_t *v);
/*
* Print this container using printf as a comma-separated list of 32-bit
* integers starting at base.
*/
-static void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
+void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
uint32_t base);
/**
@@ -2510,11 +2627,11 @@ static inline int32_t bitset_container_serialized_size_in_bytes(void) {
/**
* Return the the number of runs.
*/
-static int bitset_container_number_of_runs(bitset_container_t *bc);
+int bitset_container_number_of_runs(bitset_container_t *bc);
-static bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base,
+bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base,
roaring_iterator iterator, void *ptr);
-static bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base,
+bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base,
roaring_iterator64 iterator, uint64_t high_bits,
void *ptr);
@@ -2525,7 +2642,7 @@ static bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t
* The number of bytes written should be
* bitset_container_size_in_bytes(container).
*/
-static int32_t bitset_container_write(const bitset_container_t *container, char *buf);
+int32_t bitset_container_write(const bitset_container_t *container, char *buf);
/**
* Reads the instance from buf, outputs how many bytes were read.
@@ -2534,7 +2651,7 @@ static int32_t bitset_container_write(const bitset_container_t *container, char
* The number of bytes read should be bitset_container_size_in_bytes(container).
* You need to provide the (known) cardinality.
*/
-static int32_t bitset_container_read(int32_t cardinality,
+int32_t bitset_container_read(int32_t cardinality,
bitset_container_t *container, const char *buf);
/**
* Return the serialized size in bytes of a container (see
@@ -2552,13 +2669,13 @@ static inline int32_t bitset_container_size_in_bytes(
/**
* Return true if the two containers have the same content.
*/
-static bool bitset_container_equals(const bitset_container_t *container1,
+bool bitset_container_equals(const bitset_container_t *container1,
const bitset_container_t *container2);
/**
* Return true if container1 is a subset of container2.
*/
-static bool bitset_container_is_subset(const bitset_container_t *container1,
+bool bitset_container_is_subset(const bitset_container_t *container1,
const bitset_container_t *container2);
/**
@@ -2567,21 +2684,21 @@ static bool bitset_container_is_subset(const bitset_container_t *container1,
* accordingly.
* Otherwise, it returns false and update start_rank.
*/
-static bool bitset_container_select(const bitset_container_t *container,
+bool bitset_container_select(const bitset_container_t *container,
uint32_t *start_rank, uint32_t rank,
uint32_t *element);
/* Returns the smallest value (assumes not empty) */
-static uint16_t bitset_container_minimum(const bitset_container_t *container);
+uint16_t bitset_container_minimum(const bitset_container_t *container);
/* Returns the largest value (assumes not empty) */
-static uint16_t bitset_container_maximum(const bitset_container_t *container);
+uint16_t bitset_container_maximum(const bitset_container_t *container);
/* Returns the number of values equal or smaller than x */
-static int bitset_container_rank(const bitset_container_t *container, uint16_t x);
+int bitset_container_rank(const bitset_container_t *container, uint16_t x);
/* Returns the index of the first value equal or larger than x, or -1 */
-static int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
+int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
@@ -2657,22 +2774,22 @@ typedef struct run_container_s run_container_t;
#define movable_CAST_run(c) movable_CAST(run_container_t **, c)
/* Create a new run container. Return NULL in case of failure. */
-static run_container_t *run_container_create(void);
+run_container_t *run_container_create(void);
/* Create a new run container with given capacity. Return NULL in case of
* failure. */
-static run_container_t *run_container_create_given_capacity(int32_t size);
+run_container_t *run_container_create_given_capacity(int32_t size);
/*
* Shrink the capacity to the actual size, return the number of bytes saved.
*/
-static int run_container_shrink_to_fit(run_container_t *src);
+int run_container_shrink_to_fit(run_container_t *src);
/* Free memory owned by `run'. */
-static void run_container_free(run_container_t *run);
+void run_container_free(run_container_t *run);
/* Duplicate container */
-static run_container_t *run_container_clone(const run_container_t *src);
+run_container_t *run_container_clone(const run_container_t *src);
/*
* Effectively deletes the value at index index, repacking data.
@@ -2686,7 +2803,7 @@ static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) {
/**
* Good old binary search through rle data
*/
-static inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray,
+inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray,
uint16_t ikey) {
int32_t low = 0;
int32_t high = lenarray - 1;
@@ -2777,7 +2894,7 @@ static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray
* existing data needs to be copied over depends on copy. If "copy" is false,
* then the new content will be uninitialized, otherwise a copy is made.
*/
-static void run_container_grow(run_container_t *run, int32_t min, bool copy);
+void run_container_grow(run_container_t *run, int32_t min, bool copy);
/**
* Moves the data so that we can write data at index
@@ -2794,7 +2911,7 @@ static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) {
}
/* Add `pos' to `run'. Returns true if `pos' was not present. */
-static bool run_container_add(run_container_t *run, uint16_t pos);
+bool run_container_add(run_container_t *run, uint16_t pos);
/* Remove `pos' from `run'. Returns true if `pos' was present. */
static inline bool run_container_remove(run_container_t *run, uint16_t pos) {
@@ -2834,7 +2951,7 @@ static inline bool run_container_remove(run_container_t *run, uint16_t pos) {
}
/* Check whether `pos' is present in `run'. */
-static inline bool run_container_contains(const run_container_t *run, uint16_t pos) {
+inline bool run_container_contains(const run_container_t *run, uint16_t pos) {
int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
if (index >= 0) return true;
index = -index - 2; // points to preceding value, possibly -1
@@ -2860,7 +2977,7 @@ static inline bool run_container_contains_range(const run_container_t *run,
return false;
}
}
- int32_t i; for (i = index; i < run->n_runs; ++i) {
+ for (int32_t i = index; i < run->n_runs; ++i) {
const uint32_t stop = run->runs[i].value + run->runs[i].length;
if (run->runs[i].value >= pos_end) break;
if (stop >= pos_end) {
@@ -2874,7 +2991,7 @@ static inline bool run_container_contains_range(const run_container_t *run,
}
/* Get the cardinality of `run'. Requires an actual computation. */
-static int run_container_cardinality(const run_container_t *run);
+int run_container_cardinality(const run_container_t *run);
/* Card > 0?, see run_container_empty for the reverse */
static inline bool run_container_nonzero_cardinality(
@@ -2891,12 +3008,7 @@ static inline bool run_container_empty(
/* Copy one container into another. We assume that they are distinct. */
-static void run_container_copy(const run_container_t *src, run_container_t *dst);
-
-/* Set the cardinality to zero (does not release memory). */
-static inline void run_container_clear(run_container_t *run) {
- run->n_runs = 0;
-}
+void run_container_copy(const run_container_t *src, run_container_t *dst);
/**
* Append run described by vl to the run container, possibly merging.
@@ -2978,31 +3090,31 @@ static inline bool run_container_is_full(const run_container_t *run) {
/* Compute the union of `src_1' and `src_2' and write the result to `dst'
* It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
-static void run_container_union(const run_container_t *src_1,
+void run_container_union(const run_container_t *src_1,
const run_container_t *src_2, run_container_t *dst);
/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */
-static void run_container_union_inplace(run_container_t *src_1,
+void run_container_union_inplace(run_container_t *src_1,
const run_container_t *src_2);
/* Compute the intersection of src_1 and src_2 and write the result to
* dst. It is assumed that dst is distinct from both src_1 and src_2. */
-static void run_container_intersection(const run_container_t *src_1,
+void run_container_intersection(const run_container_t *src_1,
const run_container_t *src_2,
run_container_t *dst);
/* Compute the size of the intersection of src_1 and src_2 . */
-static int run_container_intersection_cardinality(const run_container_t *src_1,
+int run_container_intersection_cardinality(const run_container_t *src_1,
const run_container_t *src_2);
/* Check whether src_1 and src_2 intersect. */
-static bool run_container_intersect(const run_container_t *src_1,
+bool run_container_intersect(const run_container_t *src_1,
const run_container_t *src_2);
/* Compute the symmetric difference of `src_1' and `src_2' and write the result
* to `dst'
* It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
-static void run_container_xor(const run_container_t *src_1,
+void run_container_xor(const run_container_t *src_1,
const run_container_t *src_2, run_container_t *dst);
/*
@@ -3013,19 +3125,19 @@ static void run_container_xor(const run_container_t *src_1,
* The function returns the number of values written.
* The caller is responsible for allocating enough memory in out.
*/
-static int run_container_to_uint32_array(void *vout, const run_container_t *cont,
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
uint32_t base);
/*
* Print this container using printf (useful for debugging).
*/
-static void run_container_printf(const run_container_t *v);
+void run_container_printf(const run_container_t *v);
/*
* Print this container using printf as a comma-separated list of 32-bit
* integers starting at base.
*/
-static void run_container_printf_as_uint32_array(const run_container_t *v,
+void run_container_printf_as_uint32_array(const run_container_t *v,
uint32_t base);
/**
@@ -3036,9 +3148,9 @@ static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) {
sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries.
}
-static bool run_container_iterate(const run_container_t *cont, uint32_t base,
+bool run_container_iterate(const run_container_t *cont, uint32_t base,
roaring_iterator iterator, void *ptr);
-static bool run_container_iterate64(const run_container_t *cont, uint32_t base,
+bool run_container_iterate64(const run_container_t *cont, uint32_t base,
roaring_iterator64 iterator, uint64_t high_bits,
void *ptr);
@@ -3048,7 +3160,7 @@ static bool run_container_iterate64(const run_container_t *cont, uint32_t base,
* Roaring.
* The number of bytes written should be run_container_size_in_bytes(container).
*/
-static int32_t run_container_write(const run_container_t *container, char *buf);
+int32_t run_container_write(const run_container_t *container, char *buf);
/**
* Reads the instance from buf, outputs how many bytes were read.
@@ -3059,7 +3171,7 @@ static int32_t run_container_write(const run_container_t *container, char *buf);
* but
* it might be effectively ignored..
*/
-static int32_t run_container_read(int32_t cardinality, run_container_t *container,
+int32_t run_container_read(int32_t cardinality, run_container_t *container,
const char *buf);
/**
@@ -3074,6 +3186,7 @@ static inline int32_t run_container_size_in_bytes(
/**
* Return true if the two containers have the same content.
*/
+ALLOW_UNALIGNED
static inline bool run_container_equals(const run_container_t *container1,
const run_container_t *container2) {
if (container1->n_runs != container2->n_runs) {
@@ -3086,14 +3199,14 @@ static inline bool run_container_equals(const run_container_t *container1,
/**
* Return true if container1 is a subset of container2.
*/
-static bool run_container_is_subset(const run_container_t *container1,
+bool run_container_is_subset(const run_container_t *container1,
const run_container_t *container2);
/**
* Used in a start-finish scan that appends segments, for XOR and NOT
*/
-static void run_container_smart_append_exclusive(run_container_t *src,
+void run_container_smart_append_exclusive(run_container_t *src,
const uint16_t start,
const uint16_t length);
@@ -3122,33 +3235,37 @@ static inline run_container_t *run_container_create_range(uint32_t start,
* accordingly.
* Otherwise, it returns false and update start_rank.
*/
-static bool run_container_select(const run_container_t *container,
+bool run_container_select(const run_container_t *container,
uint32_t *start_rank, uint32_t rank,
uint32_t *element);
/* Compute the difference of src_1 and src_2 and write the result to
* dst. It is assumed that dst is distinct from both src_1 and src_2. */
-static void run_container_andnot(const run_container_t *src_1,
+void run_container_andnot(const run_container_t *src_1,
const run_container_t *src_2, run_container_t *dst);
+void run_container_offset(const run_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset);
+
/* Returns the smallest value (assumes not empty) */
-static inline uint16_t run_container_minimum(const run_container_t *run) {
+inline uint16_t run_container_minimum(const run_container_t *run) {
if (run->n_runs == 0) return 0;
return run->runs[0].value;
}
/* Returns the largest value (assumes not empty) */
-static inline uint16_t run_container_maximum(const run_container_t *run) {
+inline uint16_t run_container_maximum(const run_container_t *run) {
if (run->n_runs == 0) return 0;
return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length;
}
/* Returns the number of values equal or smaller than x */
-static int run_container_rank(const run_container_t *arr, uint16_t x);
+int run_container_rank(const run_container_t *arr, uint16_t x);
/* Returns the index of the first run containing a value at least as large as x, or -1 */
-static inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) {
+inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) {
int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x);
if (index >= 0) return index;
index = -index - 2; // points to preceding run, possibly -1
@@ -3194,14 +3311,15 @@ static inline void run_container_add_range_nruns(run_container_t* run,
}
/**
- * Add all values in range [min, max]
+ * Add all values in range [min, max]. This function is currently unused
+ * and left as documentation.
*/
-static inline void run_container_add_range(run_container_t* run,
+/*static inline void run_container_add_range(run_container_t* run,
uint32_t min, uint32_t max) {
int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
-}
+}*/
/**
* Shifts last $count elements either left (distance < 0) or right (distance > 0)
@@ -3291,31 +3409,31 @@ extern "C" { namespace roaring { namespace internal {
/* Convert an array into a bitset. The input container is not freed or modified.
*/
-static bitset_container_t *bitset_container_from_array(const array_container_t *arr);
+bitset_container_t *bitset_container_from_array(const array_container_t *arr);
/* Convert a run into a bitset. The input container is not freed or modified. */
-static bitset_container_t *bitset_container_from_run(const run_container_t *arr);
+bitset_container_t *bitset_container_from_run(const run_container_t *arr);
/* Convert a run into an array. The input container is not freed or modified. */
-static array_container_t *array_container_from_run(const run_container_t *arr);
+array_container_t *array_container_from_run(const run_container_t *arr);
/* Convert a bitset into an array. The input container is not freed or modified.
*/
-static array_container_t *array_container_from_bitset(const bitset_container_t *bits);
+array_container_t *array_container_from_bitset(const bitset_container_t *bits);
/* Convert an array into a run. The input container is not freed or modified.
*/
-static run_container_t *run_container_from_array(const array_container_t *c);
+run_container_t *run_container_from_array(const array_container_t *c);
/* convert a run into either an array or a bitset
* might free the container. This does not free the input run container. */
-static container_t *convert_to_bitset_or_array_container(
+container_t *convert_to_bitset_or_array_container(
run_container_t *rc, int32_t card,
uint8_t *resulttype);
/* convert containers to and from runcontainers, as is most space efficient.
* The container might be freed. */
-static container_t *convert_run_optimize(
+container_t *convert_run_optimize(
container_t *c, uint8_t typecode_original,
uint8_t *typecode_after);
@@ -3324,18 +3442,18 @@ static container_t *convert_run_optimize(
/* If a conversion occurs, the caller is responsible to free the original
* container and
* he becomes reponsible to free the new one. */
-static container_t *convert_run_to_efficient_container(
+container_t *convert_run_to_efficient_container(
run_container_t *c, uint8_t *typecode_after);
// like convert_run_to_efficient_container but frees the old result if needed
-static container_t *convert_run_to_efficient_container_and_free(
+container_t *convert_run_to_efficient_container_and_free(
run_container_t *c, uint8_t *typecode_after);
/**
* Create new container which is a union of run container and
* range [min, max]. Caller is responsible for freeing run container.
*/
-static container_t *container_from_run_range(
+container_t *container_from_run_range(
const run_container_t *run,
uint32_t min, uint32_t max,
uint8_t *typecode_after);
@@ -3363,18 +3481,18 @@ extern "C" { namespace roaring { namespace internal {
/**
* Return true if the two containers have the same content.
*/
-static bool array_container_equal_bitset(const array_container_t* container1,
+bool array_container_equal_bitset(const array_container_t* container1,
const bitset_container_t* container2);
/**
* Return true if the two containers have the same content.
*/
-static bool run_container_equals_array(const run_container_t* container1,
+bool run_container_equals_array(const run_container_t* container1,
const array_container_t* container2);
/**
* Return true if the two containers have the same content.
*/
-static bool run_container_equals_bitset(const run_container_t* container1,
+bool run_container_equals_bitset(const run_container_t* container1,
const bitset_container_t* container2);
#ifdef __cplusplus
@@ -3400,31 +3518,31 @@ extern "C" { namespace roaring { namespace internal {
/**
* Return true if container1 is a subset of container2.
*/
-static bool array_container_is_subset_bitset(const array_container_t* container1,
+bool array_container_is_subset_bitset(const array_container_t* container1,
const bitset_container_t* container2);
/**
* Return true if container1 is a subset of container2.
*/
-static bool run_container_is_subset_array(const run_container_t* container1,
+bool run_container_is_subset_array(const run_container_t* container1,
const array_container_t* container2);
/**
* Return true if container1 is a subset of container2.
*/
-static bool array_container_is_subset_run(const array_container_t* container1,
+bool array_container_is_subset_run(const array_container_t* container1,
const run_container_t* container2);
/**
* Return true if container1 is a subset of container2.
*/
-static bool run_container_is_subset_bitset(const run_container_t* container1,
+bool run_container_is_subset_bitset(const run_container_t* container1,
const bitset_container_t* container2);
/**
* Return true if container1 is a subset of container2.
*/
-static bool bitset_container_is_subset_run(const bitset_container_t* container1,
+bool bitset_container_is_subset_run(const bitset_container_t* container1,
const run_container_t* container2);
#ifdef __cplusplus
@@ -3447,14 +3565,14 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the andnot of src_1 and src_2 and write the result to
* dst, a valid array container that could be the same as dst.*/
-static void array_bitset_container_andnot(const array_container_t *src_1,
+void array_bitset_container_andnot(const array_container_t *src_1,
const bitset_container_t *src_2,
array_container_t *dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* src_1 */
-static void array_bitset_container_iandnot(array_container_t *src_1,
+void array_bitset_container_iandnot(array_container_t *src_1,
const bitset_container_t *src_2);
/* Compute the andnot of src_1 and src_2 and write the result to
@@ -3462,7 +3580,7 @@ static void array_bitset_container_iandnot(array_container_t *src_1,
* Return true for a bitset result; false for array
*/
-static bool bitset_array_container_andnot(
+bool bitset_array_container_andnot(
const bitset_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3473,7 +3591,7 @@ static bool bitset_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_array_container_iandnot(
+bool bitset_array_container_iandnot(
bitset_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3484,7 +3602,7 @@ static bool bitset_array_container_iandnot(
* result true) or an array container.
*/
-static bool run_bitset_container_andnot(
+bool run_bitset_container_andnot(
const run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -3495,7 +3613,7 @@ static bool run_bitset_container_andnot(
* result true) or an array container.
*/
-static bool run_bitset_container_iandnot(
+bool run_bitset_container_iandnot(
run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -3506,7 +3624,7 @@ static bool run_bitset_container_iandnot(
* result true) or an array container.
*/
-static bool bitset_run_container_andnot(
+bool bitset_run_container_andnot(
const bitset_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -3517,7 +3635,7 @@ static bool bitset_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_run_container_iandnot(
+bool bitset_run_container_iandnot(
bitset_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -3525,7 +3643,7 @@ static bool bitset_run_container_iandnot(
* can become any type of container.
*/
-static int run_array_container_andnot(
+int run_array_container_andnot(
const run_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3536,13 +3654,13 @@ static int run_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static int run_array_container_iandnot(
+int run_array_container_iandnot(
run_container_t *src_1, const array_container_t *src_2,
container_t **dst);
/* dst must be a valid array container, allowed to be src_1 */
-static void array_run_container_andnot(const array_container_t *src_1,
+void array_run_container_andnot(const array_container_t *src_1,
const run_container_t *src_2,
array_container_t *dst);
@@ -3550,14 +3668,14 @@ static void array_run_container_andnot(const array_container_t *src_1,
* can become any kind of container.
*/
-static void array_run_container_iandnot(array_container_t *src_1,
+void array_run_container_iandnot(array_container_t *src_1,
const run_container_t *src_2);
/* dst does not indicate a valid container initially. Eventually it
* can become any kind of container.
*/
-static int run_run_container_andnot(
+int run_run_container_andnot(
const run_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -3568,7 +3686,7 @@ static int run_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static int run_run_container_iandnot(
+int run_run_container_iandnot(
run_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -3576,13 +3694,13 @@ static int run_run_container_iandnot(
* dst is a valid array container and may be the same as src_1
*/
-static void array_array_container_andnot(const array_container_t *src_1,
+void array_array_container_andnot(const array_container_t *src_1,
const array_container_t *src_2,
array_container_t *dst);
/* inplace array-array andnot will always be able to reuse the space of
* src_1 */
-static void array_array_container_iandnot(array_container_t *src_1,
+void array_array_container_iandnot(array_container_t *src_1,
const array_container_t *src_2);
/* Compute the andnot of src_1 and src_2 and write the result to
@@ -3590,7 +3708,7 @@ static void array_array_container_iandnot(array_container_t *src_1,
* "dst is a bitset"
*/
-static bool bitset_bitset_container_andnot(
+bool bitset_bitset_container_andnot(
const bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -3601,7 +3719,7 @@ static bool bitset_bitset_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_bitset_container_iandnot(
+bool bitset_bitset_container_iandnot(
bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -3633,18 +3751,18 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the intersection of src_1 and src_2 and write the result to
* dst. It is allowed for dst to be equal to src_1. We assume that dst is a
* valid container. */
-static void array_bitset_container_intersection(const array_container_t *src_1,
+void array_bitset_container_intersection(const array_container_t *src_1,
const bitset_container_t *src_2,
array_container_t *dst);
/* Compute the size of the intersection of src_1 and src_2. */
-static int array_bitset_container_intersection_cardinality(
+int array_bitset_container_intersection_cardinality(
const array_container_t *src_1, const bitset_container_t *src_2);
/* Checking whether src_1 and src_2 intersect. */
-static bool array_bitset_container_intersect(const array_container_t *src_1,
+bool array_bitset_container_intersect(const array_container_t *src_1,
const bitset_container_t *src_2);
/*
@@ -3653,14 +3771,14 @@ static bool array_bitset_container_intersect(const array_container_t *src_1,
* otherwise is a array_container_t. We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-static bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
+bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
const bitset_container_t *src_2,
container_t **dst);
/* Compute the intersection between src_1 and src_2 and write the result to
* dst. It is allowed for dst to be equal to src_1. We assume that dst is a
* valid container. */
-static void array_run_container_intersection(const array_container_t *src_1,
+void array_run_container_intersection(const array_container_t *src_1,
const run_container_t *src_2,
array_container_t *dst);
@@ -3669,27 +3787,27 @@ static void array_run_container_intersection(const array_container_t *src_1,
* otherwise is a array_container_t.
* If *dst == src_2, then an in-place intersection is attempted
**/
-static bool run_bitset_container_intersection(const run_container_t *src_1,
+bool run_bitset_container_intersection(const run_container_t *src_1,
const bitset_container_t *src_2,
container_t **dst);
/* Compute the size of the intersection between src_1 and src_2 . */
-static int array_run_container_intersection_cardinality(const array_container_t *src_1,
+int array_run_container_intersection_cardinality(const array_container_t *src_1,
const run_container_t *src_2);
/* Compute the size of the intersection between src_1 and src_2
**/
-static int run_bitset_container_intersection_cardinality(const run_container_t *src_1,
+int run_bitset_container_intersection_cardinality(const run_container_t *src_1,
const bitset_container_t *src_2);
/* Check that src_1 and src_2 intersect. */
-static bool array_run_container_intersect(const array_container_t *src_1,
+bool array_run_container_intersect(const array_container_t *src_1,
const run_container_t *src_2);
/* Check that src_1 and src_2 intersect.
**/
-static bool run_bitset_container_intersect(const run_container_t *src_1,
+bool run_bitset_container_intersect(const run_container_t *src_1,
const bitset_container_t *src_2);
/*
@@ -3700,7 +3818,7 @@ static bool run_bitset_container_intersect(const run_container_t *src_1,
* to free the container.
* In all cases, the result is in *dst.
*/
-static bool bitset_bitset_container_intersection_inplace(
+bool bitset_bitset_container_intersection_inplace(
bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -3731,7 +3849,7 @@ extern "C" { namespace roaring { namespace internal {
* We assume that dst is pre-allocated and a valid bitset container
* There can be no in-place version.
*/
-static void array_container_negation(const array_container_t *src,
+void array_container_negation(const array_container_t *src,
bitset_container_t *dst);
/* Negation across the entire range of the container
@@ -3741,7 +3859,7 @@ static void array_container_negation(const array_container_t *src,
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-static bool bitset_container_negation(
+bool bitset_container_negation(
const bitset_container_t *src,
container_t **dst);
@@ -3754,7 +3872,7 @@ static bool bitset_container_negation(
* to free the container.
* In all cases, the result is in *dst.
*/
-static bool bitset_container_negation_inplace(
+bool bitset_container_negation_inplace(
bitset_container_t *src,
container_t **dst);
@@ -3765,7 +3883,7 @@ static bool bitset_container_negation_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-static int run_container_negation(const run_container_t *src, container_t **dst);
+int run_container_negation(const run_container_t *src, container_t **dst);
/*
* Same as run_container_negation except that if the output is to
@@ -3774,14 +3892,14 @@ static int run_container_negation(const run_container_t *src, container_t **dst)
* then src is modified and no allocation is made.
* In all cases, the result is in *dst.
*/
-static int run_container_negation_inplace(run_container_t *src, container_t **dst);
+int run_container_negation_inplace(run_container_t *src, container_t **dst);
/* Negation across a range of the container.
* Compute the negation of src and write the result
* to *dst. Returns true if the result is a bitset container
* and false for an array container. *dst is not preallocated.
*/
-static bool array_container_negation_range(
+bool array_container_negation_range(
const array_container_t *src,
const int range_start, const int range_end,
container_t **dst);
@@ -3790,7 +3908,7 @@ static bool array_container_negation_range(
* inplace version without inefficient copying. Thus this routine
* may be a wrapper for the non-in-place version
*/
-static bool array_container_negation_range_inplace(
+bool array_container_negation_range_inplace(
array_container_t *src,
const int range_start, const int range_end,
container_t **dst);
@@ -3802,7 +3920,7 @@ static bool array_container_negation_range_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-static bool bitset_container_negation_range(
+bool bitset_container_negation_range(
const bitset_container_t *src,
const int range_start, const int range_end,
container_t **dst);
@@ -3816,7 +3934,7 @@ static bool bitset_container_negation_range(
* to free the container.
* In all cases, the result is in *dst.
*/
-static bool bitset_container_negation_range_inplace(
+bool bitset_container_negation_range_inplace(
bitset_container_t *src,
const int range_start, const int range_end,
container_t **dst);
@@ -3827,7 +3945,7 @@ static bool bitset_container_negation_range_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-static int run_container_negation_range(
+int run_container_negation_range(
const run_container_t *src,
const int range_start, const int range_end,
container_t **dst);
@@ -3839,7 +3957,7 @@ static int run_container_negation_range(
* then src is modified and no allocation is made.
* In all cases, the result is in *dst.
*/
-static int run_container_negation_range_inplace(
+int run_container_negation_range_inplace(
run_container_t *src,
const int range_start, const int range_end,
container_t **dst);
@@ -3871,14 +3989,14 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the union of src_1 and src_2 and write the result to
* dst. It is allowed for src_2 to be dst. */
-static void array_bitset_container_union(const array_container_t *src_1,
+void array_bitset_container_union(const array_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Compute the union of src_1 and src_2 and write the result to
* dst. It is allowed for src_2 to be dst. This version does not
* update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
-static void array_bitset_container_lazy_union(const array_container_t *src_1,
+void array_bitset_container_lazy_union(const array_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
@@ -3888,7 +4006,7 @@ static void array_bitset_container_lazy_union(const array_container_t *src_1,
* otherwise is a array_container_t. We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-static bool array_array_container_union(
+bool array_array_container_union(
const array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3900,7 +4018,7 @@ static bool array_array_container_union(
* it either written to src_1 (if *dst is null) or to *dst.
* If the result is a bitset_container_t and *dst is null, then there was a failure.
*/
-static bool array_array_container_inplace_union(
+bool array_array_container_inplace_union(
array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3908,7 +4026,7 @@ static bool array_array_container_inplace_union(
* Same as array_array_container_union except that it will more eagerly produce
* a bitset.
*/
-static bool array_array_container_lazy_union(
+bool array_array_container_lazy_union(
const array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3916,7 +4034,7 @@ static bool array_array_container_lazy_union(
* Same as array_array_container_inplace_union except that it will more eagerly produce
* a bitset.
*/
-static bool array_array_container_lazy_inplace_union(
+bool array_array_container_lazy_inplace_union(
array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -3925,7 +4043,7 @@ static bool array_array_container_lazy_inplace_union(
* valid container. The result might need to be further converted to array or
* bitset container,
* the caller is responsible for the eventual conversion. */
-static void array_run_container_union(const array_container_t *src_1,
+void array_run_container_union(const array_container_t *src_1,
const run_container_t *src_2,
run_container_t *dst);
@@ -3933,7 +4051,7 @@ static void array_run_container_union(const array_container_t *src_1,
* src2. The result might need to be further converted to array or
* bitset container,
* the caller is responsible for the eventual conversion. */
-static void array_run_container_inplace_union(const array_container_t *src_1,
+void array_run_container_inplace_union(const array_container_t *src_1,
run_container_t *src_2);
/* Compute the union of src_1 and src_2 and write the result to
@@ -3941,7 +4059,7 @@ static void array_run_container_inplace_union(const array_container_t *src_1,
* If run_container_is_full(src_1) is true, you must not be calling this
*function.
**/
-static void run_bitset_container_union(const run_container_t *src_1,
+void run_bitset_container_union(const run_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
@@ -3951,7 +4069,7 @@ static void run_bitset_container_union(const run_container_t *src_1,
* If run_container_is_full(src_1) is true, you must not be calling this
* function.
* */
-static void run_bitset_container_lazy_union(const run_container_t *src_1,
+void run_bitset_container_lazy_union(const run_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
@@ -3990,7 +4108,7 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the xor of src_1 and src_2 and write the result to
* dst (which has no container initially).
* Result is true iff dst is a bitset */
-static bool array_bitset_container_xor(
+bool array_bitset_container_xor(
const array_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -3999,7 +4117,7 @@ static bool array_bitset_container_xor(
* update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
*/
-static void array_bitset_container_lazy_xor(const array_container_t *src_1,
+void array_bitset_container_lazy_xor(const array_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
/* Compute the xor of src_1 and src_2 and write the result to
@@ -4007,7 +4125,7 @@ static void array_bitset_container_lazy_xor(const array_container_t *src_1,
* "dst is a bitset"
*/
-static bool bitset_bitset_container_xor(
+bool bitset_bitset_container_xor(
const bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -4018,7 +4136,7 @@ static bool bitset_bitset_container_xor(
* result true) or an array container.
*/
-static bool run_bitset_container_xor(
+bool run_bitset_container_xor(
const run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -4027,7 +4145,7 @@ static bool run_bitset_container_xor(
* cardinality would dictate an array container.
*/
-static void run_bitset_container_lazy_xor(const run_container_t *src_1,
+void run_bitset_container_lazy_xor(const run_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
@@ -4035,7 +4153,7 @@ static void run_bitset_container_lazy_xor(const run_container_t *src_1,
* can become any kind of container.
*/
-static int array_run_container_xor(
+int array_run_container_xor(
const array_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -4043,7 +4161,7 @@ static int array_run_container_xor(
* an array or a bitset container, indicated by return code
*/
-static bool array_array_container_xor(
+bool array_array_container_xor(
const array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -4053,7 +4171,7 @@ static bool array_array_container_xor(
* container type might not be correct for the actual cardinality
*/
-static bool array_array_container_lazy_xor(
+bool array_array_container_lazy_xor(
const array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
@@ -4062,7 +4180,7 @@ static bool array_array_container_lazy_xor(
* smaller.
*/
-static void array_run_container_lazy_xor(const array_container_t *src_1,
+void array_run_container_lazy_xor(const array_container_t *src_1,
const run_container_t *src_2,
run_container_t *dst);
@@ -4070,7 +4188,7 @@ static void array_run_container_lazy_xor(const array_container_t *src_1,
* can become any kind of container.
*/
-static int run_run_container_xor(
+int run_run_container_xor(
const run_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -4085,15 +4203,15 @@ static int run_run_container_xor(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_array_container_ixor(
+bool bitset_array_container_ixor(
bitset_container_t *src_1, const array_container_t *src_2,
container_t **dst);
-static bool bitset_bitset_container_ixor(
+bool bitset_bitset_container_ixor(
bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
-static bool array_bitset_container_ixor(
+bool array_bitset_container_ixor(
array_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
@@ -4104,11 +4222,11 @@ static bool array_bitset_container_ixor(
* result true) or an array container.
*/
-static bool run_bitset_container_ixor(
+bool run_bitset_container_ixor(
run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst);
-static bool bitset_run_container_ixor(
+bool bitset_run_container_ixor(
bitset_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -4116,19 +4234,19 @@ static bool bitset_run_container_ixor(
* can become any kind of container.
*/
-static int array_run_container_ixor(
+int array_run_container_ixor(
array_container_t *src_1, const run_container_t *src_2,
container_t **dst);
-static int run_array_container_ixor(
+int run_array_container_ixor(
run_container_t *src_1, const array_container_t *src_2,
container_t **dst);
-static bool array_array_container_ixor(
+bool array_array_container_ixor(
array_container_t *src_1, const array_container_t *src_2,
container_t **dst);
-static int run_run_container_ixor(
+int run_run_container_ixor(
run_container_t *src_1, const run_container_t *src_2,
container_t **dst);
@@ -4204,18 +4322,18 @@ typedef struct shared_container_s shared_container_t;
* If copy_on_write = false, then clone.
* Return NULL in case of failure.
**/
-static container_t *get_copy_of_container(container_t *container, uint8_t *typecode,
+container_t *get_copy_of_container(container_t *container, uint8_t *typecode,
bool copy_on_write);
/* Frees a shared container (actually decrement its counter and only frees when
* the counter falls to zero). */
-static void shared_container_free(shared_container_t *container);
+void shared_container_free(shared_container_t *container);
/* extract a copy from the shared container, freeing the shared container if
there is just one instance left,
clone instances when the counter is higher than one
*/
-static container_t *shared_container_extract_copy(shared_container_t *container,
+container_t *shared_container_extract_copy(shared_container_t *container,
uint8_t *typecode);
/* access to container underneath */
@@ -4261,7 +4379,7 @@ static inline uint8_t get_container_type(
* is responsible for deallocation. If the container is not shared, then it is
* physically cloned. Sharable containers are not cloneable.
*/
-static container_t *container_clone(const container_t *container, uint8_t typecode);
+container_t *container_clone(const container_t *container, uint8_t typecode);
/* access to container underneath, cloning it if needed */
static inline container_t *get_writable_copy_if_shared(
@@ -4311,7 +4429,7 @@ static inline bitset_container_t *container_to_bitset(
* Get the container name from the typecode
* (unused at time of writing)
*/
-static inline const char *get_container_name(uint8_t typecode) {
+/*static inline const char *get_container_name(uint8_t typecode) {
switch (typecode) {
case BITSET_CONTAINER_TYPE:
return container_names[0];
@@ -4326,7 +4444,7 @@ static inline const char *get_container_name(uint8_t typecode) {
__builtin_unreachable();
return "unknown";
}
-}
+}*/
static inline const char *get_full_container_name(
const container_t *c, uint8_t typecode
@@ -4555,13 +4673,13 @@ static inline int32_t container_size_in_bytes(
/**
* print the container (useful for debugging), requires a typecode
*/
-static void container_printf(const container_t *container, uint8_t typecode);
+void container_printf(const container_t *container, uint8_t typecode);
/**
* print the content of the container as a comma-separated list of 32-bit values
* starting at base, requires a typecode
*/
-static void container_printf_as_uint32_array(const container_t *container,
+void container_printf_as_uint32_array(const container_t *container,
uint8_t typecode, uint32_t base);
/**
@@ -4588,7 +4706,7 @@ static inline bool container_nonzero_cardinality(
/**
* Recover memory from a container, requires a typecode
*/
-static void container_free(container_t *container, uint8_t typecode);
+void container_free(container_t *container, uint8_t typecode);
/**
* Convert a container to an array of values, requires a typecode as well as a
@@ -5308,7 +5426,7 @@ static inline container_t *container_lazy_or(
CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// we are being lazy
- result = convert_run_to_efficient_container(
+ result = convert_run_to_efficient_container_and_free(
CAST_run(result), result_type);
return result;
@@ -5713,6 +5831,43 @@ static inline container_t* container_xor(
}
}
+/* Applies an offset to the non-empty container 'c'.
+ * The results are stored in new containers returned via 'lo' and 'hi', for the
+ * low and high halves of the result (where the low half matches the original key
+ * and the high one corresponds to values for the following key).
+ * Either one of 'lo' and 'hi' are allowed to be 'NULL', but not both.
+ * Whenever one of them is not 'NULL', it should point to a 'NULL' container.
+ * Whenever one of them is 'NULL' the shifted elements for that part will not be
+ * computed.
+ * If either of the resulting containers turns out to be empty, the pointed
+ * container will remain 'NULL'.
+ */
+static inline void container_add_offset(const container_t *c, uint8_t type,
+ container_t **lo, container_t **hi,
+ uint16_t offset) {
+ assert(offset != 0);
+ assert(container_nonzero_cardinality(c, type));
+ assert(lo != NULL || hi != NULL);
+ assert(lo == NULL || *lo == NULL);
+ assert(hi == NULL || *hi == NULL);
+
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_offset(const_CAST_bitset(c), lo, hi, offset);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ array_container_offset(const_CAST_array(c), lo, hi, offset);
+ break;
+ case RUN_CONTAINER_TYPE:
+ run_container_offset(const_CAST_run(c), lo, hi, offset);
+ break;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ break;
+ }
+}
+
/**
* Compute xor between two containers, generate a new container (having type
* result_type), requires a typecode. This allocates new memory, caller
@@ -6522,7 +6677,7 @@ static inline container_t *container_remove_range(
if (result_cardinality == 0) {
return NULL;
- } else if (result_cardinality < DEFAULT_MAX_SIZE) {
+ } else if (result_cardinality <= DEFAULT_MAX_SIZE) {
*result_type = ARRAY_CONTAINER_TYPE;
bitset_reset_range(bitset->words, min, max+1);
bitset->cardinality = result_cardinality;
@@ -6561,15 +6716,7 @@ static inline container_t *container_remove_range(
}
run_container_remove_range(run, min, max);
-
- if (run_container_serialized_size_in_bytes(run->n_runs) <=
- bitset_container_serialized_size_in_bytes()) {
- *result_type = RUN_CONTAINER_TYPE;
- return run;
- } else {
- *result_type = BITSET_CONTAINER_TYPE;
- return bitset_container_from_run(run);
- }
+ return convert_run_to_efficient_container(run, result_type);
}
default:
__builtin_unreachable();
@@ -6610,55 +6757,55 @@ enum {
/**
* Create a new roaring array
*/
-static roaring_array_t *ra_create(void);
+roaring_array_t *ra_create(void);
/**
* Initialize an existing roaring array with the specified capacity (in number
* of containers)
*/
-static bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap);
+bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap);
/**
* Initialize with zero capacity
*/
-static void ra_init(roaring_array_t *t);
+void ra_init(roaring_array_t *t);
/**
* Copies this roaring array, we assume that dest is not initialized
*/
-static bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
+bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
bool copy_on_write);
/*
* Shrinks the capacity, returns the number of bytes saved.
*/
-static int ra_shrink_to_fit(roaring_array_t *ra);
+int ra_shrink_to_fit(roaring_array_t *ra);
/**
* Copies this roaring array, we assume that dest is initialized
*/
-static bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
+bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
bool copy_on_write);
/**
* Frees the memory used by a roaring array
*/
-static void ra_clear(roaring_array_t *r);
+void ra_clear(roaring_array_t *r);
/**
* Frees the memory used by a roaring array, but does not free the containers
*/
-static void ra_clear_without_containers(roaring_array_t *r);
+void ra_clear_without_containers(roaring_array_t *r);
/**
* Frees just the containers
*/
-static void ra_clear_containers(roaring_array_t *ra);
+void ra_clear_containers(roaring_array_t *ra);
/**
* Get the index corresponding to a 16-bit key
*/
-static inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
+inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1;
return binarySearch(ra->keys, (int32_t)ra->size, x);
}
@@ -6666,7 +6813,7 @@ static inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
/**
* Retrieves the container at index i, filling in the typecode
*/
-static inline container_t *ra_get_container_at_index(
+inline container_t *ra_get_container_at_index(
const roaring_array_t *ra, uint16_t i, uint8_t *typecode
){
*typecode = ra->typecodes[i];
@@ -6676,19 +6823,21 @@ static inline container_t *ra_get_container_at_index(
/**
* Retrieves the key at index i
*/
-static uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i);
+inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
+ return ra->keys[i];
+}
/**
* Add a new key-value pair at index i
*/
-static void ra_insert_new_key_value_at(
+void ra_insert_new_key_value_at(
roaring_array_t *ra, int32_t i, uint16_t key,
container_t *c, uint8_t typecode);
/**
* Append a new key-value pair
*/
-static void ra_append(
+void ra_append(
roaring_array_t *ra, uint16_t key,
container_t *c, uint8_t typecode);
@@ -6696,7 +6845,7 @@ static void ra_append(
* Append a new key-value pair to ra, cloning (in COW sense) a value from sa
* at index index
*/
-static void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
+void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t index, bool copy_on_write);
/**
@@ -6704,21 +6853,21 @@ static void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
* at indexes
* [start_index, end_index)
*/
-static void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
+void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
int32_t start_index, int32_t end_index,
bool copy_on_write);
/** appends from sa to ra, ending with the greatest key that is
* is less or equal stopping_key
*/
-static void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
+void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t stopping_key, bool copy_on_write);
/** appends from sa to ra, starting with the smallest key that is
* is strictly greater than before_start
*/
-static void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
+void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t before_start, bool copy_on_write);
/**
@@ -6726,13 +6875,13 @@ static void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *s
* [start_index, end_index), old array should not be freed
* (use ra_clear_without_containers)
**/
-static void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
+void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
int32_t start_index, int32_t end_index);
/**
* Append new key-value pairs to ra, from sa at indexes
* [start_index, end_index)
*/
-static void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
+void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
int32_t start_index, int32_t end_index,
bool copy_on_write);
@@ -6740,7 +6889,7 @@ static void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
* Set the container at the corresponding index using the specified
* typecode.
*/
-static inline void ra_set_container_at_index(
+inline void ra_set_container_at_index(
const roaring_array_t *ra, int32_t i,
container_t *c, uint8_t typecode
){
@@ -6754,20 +6903,20 @@ static inline void ra_set_container_at_index(
* (at
* least);
*/
-static bool extend_array(roaring_array_t *ra, int32_t k);
+bool extend_array(roaring_array_t *ra, int32_t k);
-static inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; }
+inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; }
static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
int32_t pos) {
return advanceUntil(ra->keys, pos, ra->size, x);
}
-static int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos);
+int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos);
-static void ra_downsize(roaring_array_t *ra, int32_t new_length);
+void ra_downsize(roaring_array_t *ra, int32_t new_length);
-static inline void ra_replace_key_and_container_at_index(
+inline void ra_replace_key_and_container_at_index(
roaring_array_t *ra, int32_t i, uint16_t key,
container_t *c, uint8_t typecode
){
@@ -6779,9 +6928,9 @@ static inline void ra_replace_key_and_container_at_index(
}
// write set bits to an array
-static void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);
+void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);
-static bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans);
+bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans);
/**
* write a bitmap to a buffer. This is meant to be compatible with
@@ -6789,7 +6938,7 @@ static bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size
* Java and Go versions. Return the size in bytes of the serialized
* output (which should be ra_portable_size_in_bytes(ra)).
*/
-static size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);
+size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);
/**
* read a bitmap from a serialized version. This is meant to be compatible
@@ -6799,7 +6948,7 @@ static size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);
* and *readbytes indicates how many bytes were read. In all cases, if the function
* returns true, then maxbytes >= *readbytes.
*/
-static bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes);
+bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes);
/**
* Quickly checks whether there is a serialized bitmap at the pointer,
@@ -6809,25 +6958,25 @@ static bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const
* This function returns 0 if and only if no valid bitmap is found.
* Otherwise, it returns how many bytes are occupied by the bitmap data.
*/
-static size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes);
+size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes);
/**
* How many bytes are required to serialize this bitmap (meant to be
* compatible
* with Java and Go versions)
*/
-static size_t ra_portable_size_in_bytes(const roaring_array_t *ra);
+size_t ra_portable_size_in_bytes(const roaring_array_t *ra);
/**
* return true if it contains at least one run container.
*/
-static bool ra_has_run_container(const roaring_array_t *ra);
+bool ra_has_run_container(const roaring_array_t *ra);
/**
* Size of the header when serializing (meant to be compatible
* with Java and Go versions)
*/
-static uint32_t ra_portable_header_size(const roaring_array_t *ra);
+uint32_t ra_portable_header_size(const roaring_array_t *ra);
/**
* If the container at the index i is share, unshare it (creating a local
@@ -6843,18 +6992,18 @@ static inline void ra_unshare_container_at_index(roaring_array_t *ra,
/**
* remove at index i, sliding over all entries after i
*/
-static void ra_remove_at_index(roaring_array_t *ra, int32_t i);
+void ra_remove_at_index(roaring_array_t *ra, int32_t i);
/**
* clears all containers, sets the size at 0 and shrinks the memory usage.
*/
-static void ra_reset(roaring_array_t *ra);
+void ra_reset(roaring_array_t *ra);
/**
* remove at index i, sliding over all entries after i. Free removed container.
*/
-static void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i);
+void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i);
/**
* remove a chunk of indices, sliding over entries after it
@@ -6865,7 +7014,7 @@ static void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i);
// the mutated RoaringBitmap that are after the largest container of
// the argument RoaringBitmap. It is followed by a call to resize.
//
-static void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
+void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
uint32_t new_begin);
/**
@@ -6875,7 +7024,7 @@ static void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
* This function doesn't free or create new containers.
* Caller is responsible for that.
*/
-static void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
+void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
#ifdef __cplusplus
} // namespace internal
@@ -6884,4378 +7033,6 @@ static void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
#endif
/* end file include/roaring/roaring_array.h */
-/* begin file include/roaring/misc/configreport.h */
-/*
- * configreport.h
- *
- */
-
-#ifndef INCLUDE_MISC_CONFIGREPORT_H_
-#define INCLUDE_MISC_CONFIGREPORT_H_
-
-#include <stddef.h> // for size_t
-#include <stdint.h>
-#include <stdio.h>
-
-
-#ifdef __cplusplus
-extern "C" { namespace roaring { namespace misc {
-#endif
-
-#ifdef CROARING_IS_X64
-// useful for basic info (0)
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx) {
-#ifdef ROARING_INLINE_ASM
- __asm volatile("cpuid"
- : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
- : "0"(*eax), "2"(*ecx));
-#endif /* not sure what to do when static inline assembly is unavailable*/
-}
-
-// CPUID instruction takes no parameters as CPUID implicitly uses the EAX
-// register.
-// The EAX register should be loaded with a value specifying what information to
-// return
-static inline void cpuinfo(int code, int *eax, int *ebx, int *ecx, int *edx) {
-#ifdef ROARING_INLINE_ASM
- __asm__ volatile("cpuid;" // call cpuid instruction
- : "=a"(*eax), "=b"(*ebx), "=c"(*ecx),
- "=d"(*edx) // output equal to "movl %%eax %1"
- : "a"(code) // input equal to "movl %1, %%eax"
- //:"%eax","%ebx","%ecx","%edx"// clobbered register
- );
-#endif /* not sure what to do when static inline assembly is unavailable*/
-}
-
-static inline int computecacheline() {
- int eax = 0, ebx = 0, ecx = 0, edx = 0;
- cpuinfo((int)0x80000006, &eax, &ebx, &ecx, &edx);
- return ecx & 0xFF;
-}
-
-// this is quite imperfect, but can be handy
-static inline const char *guessprocessor() {
- unsigned eax = 1, ebx = 0, ecx = 0, edx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- const char *codename;
- switch (eax >> 4) {
- case 0x506E:
- codename = "Skylake";
- break;
- case 0x406C:
- codename = "CherryTrail";
- break;
- case 0x306D:
- codename = "Broadwell";
- break;
- case 0x306C:
- codename = "Haswell";
- break;
- case 0x306A:
- codename = "IvyBridge";
- break;
- case 0x206A:
- case 0x206D:
- codename = "SandyBridge";
- break;
- case 0x2065:
- case 0x206C:
- case 0x206F:
- codename = "Westmere";
- break;
- case 0x106E:
- case 0x106A:
- case 0x206E:
- codename = "Nehalem";
- break;
- case 0x1067:
- case 0x106D:
- codename = "Penryn";
- break;
- case 0x006F:
- case 0x1066:
- codename = "Merom";
- break;
- case 0x0066:
- codename = "Presler";
- break;
- case 0x0063:
- case 0x0064:
- codename = "Prescott";
- break;
- case 0x006D:
- codename = "Dothan";
- break;
- case 0x0366:
- codename = "Cedarview";
- break;
- case 0x0266:
- codename = "Lincroft";
- break;
- case 0x016C:
- codename = "Pineview";
- break;
- default:
- codename = "UNKNOWN";
- break;
- }
- return codename;
-}
-
-static inline void tellmeall() {
- printf("x64 processor: %s\t", guessprocessor());
-
-#ifdef __VERSION__
- printf(" compiler version: %s\t", __VERSION__);
-#endif
- uint32_t config = croaring_detect_supported_architectures();
- if((config & CROARING_NEON) == CROARING_NEON) {
- printf(" NEON detected\t");
- }
- #ifdef __AVX2__
- printf(" Building for AVX2\t");
- #endif
- if(croaring_avx2()) {
- printf( "AVX2 usable\t");
- }
- if((config & CROARING_AVX2) == CROARING_AVX2) {
- printf( "AVX2 detected\t");
- if(!croaring_avx2()) {
- printf( "AVX2 not used\t");
- }
- }
- if((config & CROARING_SSE42) == CROARING_SSE42) {
- printf(" SSE4.2 detected\t");
- }
- if((config & CROARING_BMI1) == CROARING_BMI1) {
- printf(" BMI1 detected\t");
- }
- if((config & CROARING_BMI2) == CROARING_BMI2) {
- printf(" BMI2 detected\t");
- }
- printf("\n");
- if ((sizeof(int) != 4) || (sizeof(long) != 8)) {
- printf("number of bytes: int = %lu long = %lu \n",
- (long unsigned int)sizeof(size_t),
- (long unsigned int)sizeof(int));
- }
-#ifdef __LITTLE_ENDIAN__
-// This is what we expect!
-// printf("you have little endian machine");
-#endif
-#ifdef __BIG_ENDIAN__
- printf("you have a big endian machine");
-#endif
-#ifdef __CHAR_BIT__
- if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???");
-#endif
- if (computecacheline() != 64)
- printf("cache line: %d bytes\n", computecacheline());
-}
-#else
-
-static inline void tellmeall() {
- printf("Non-X64 processor\n");
-#ifdef __arm__
- printf("ARM processor detected\n");
-#endif
-#ifdef __VERSION__
- printf(" compiler version: %s\t", __VERSION__);
-#endif
- uint32_t config = croaring_detect_supported_architectures();
- if((config & CROARING_NEON) == CROARING_NEON) {
- printf(" NEON detected\t");
- }
- if((config & CROARING_ALTIVEC) == CROARING_ALTIVEC) {
- printf("Altivec detected\n");
- }
-
- if ((sizeof(int) != 4) || (sizeof(long) != 8)) {
- printf("number of bytes: int = %lu long = %lu \n",
- (long unsigned int)sizeof(size_t),
- (long unsigned int)sizeof(int));
- }
-#ifdef __LITTLE_ENDIAN__
-// This is what we expect!
-// printf("you have little endian machine");
-#endif
-#ifdef __BIG_ENDIAN__
- printf("you have a big endian machine");
-#endif
-#ifdef __CHAR_BIT__
- if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???");
-#endif
-}
-
-#endif
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace misc {
-#endif
-
-#endif /* INCLUDE_MISC_CONFIGREPORT_H_ */
-/* end file include/roaring/misc/configreport.h */
-/* begin file src/roaring_array.c */
-#include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-
-
-#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
-#endif
-
-// Convention: [0,ra->size) all elements are initialized
-// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing
-
-extern inline int32_t ra_get_size(const roaring_array_t *ra);
-extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
-
-extern inline container_t *ra_get_container_at_index(
- const roaring_array_t *ra, uint16_t i,
- uint8_t *typecode);
-
-extern inline void ra_unshare_container_at_index(roaring_array_t *ra,
- uint16_t i);
-
-extern inline void ra_replace_key_and_container_at_index(
- roaring_array_t *ra, int32_t i, uint16_t key,
- container_t *c, uint8_t typecode);
-
-extern inline void ra_set_container_at_index(
- const roaring_array_t *ra, int32_t i,
- container_t *c, uint8_t typecode);
-
-static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
- //
- // Note: not implemented using C's realloc(), because the memory layout is
- // Struct-of-Arrays vs. Array-of-Structs:
- // https://github.com/RoaringBitmap/CRoaring/issues/256
-
- if ( new_capacity == 0 ) {
- ndpi_free(ra->containers);
- ra->containers = NULL;
- ra->keys = NULL;
- ra->typecodes = NULL;
- ra->allocation_size = 0;
- return true;
- }
- const size_t memoryneeded = new_capacity * (
- sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
- void *bigalloc = ndpi_malloc(memoryneeded);
- if (!bigalloc) return false;
- void *oldbigalloc = ra->containers;
- container_t **newcontainers = (container_t **)bigalloc;
- uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
- uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
- assert((char *)(newtypecodes + new_capacity) ==
- (char *)bigalloc + memoryneeded);
- if(ra->size > 0) {
- memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size);
- memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
- memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
- }
- ra->containers = newcontainers;
- ra->keys = newkeys;
- ra->typecodes = newtypecodes;
- ra->allocation_size = new_capacity;
- ndpi_free(oldbigalloc);
- return true;
-}
-
-static bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
- if (!new_ra) return false;
- ra_init(new_ra);
-
- if (cap > INT32_MAX) { return false; }
-
- if(cap > 0) {
- void *bigalloc = ndpi_malloc(cap *
- (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)));
- if( bigalloc == NULL ) return false;
- new_ra->containers = (container_t **)bigalloc;
- new_ra->keys = (uint16_t *)(new_ra->containers + cap);
- new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
- // Narrowing is safe because of above check
- new_ra->allocation_size = (int32_t)cap;
- }
- return true;
-}
-
-static int ra_shrink_to_fit(roaring_array_t *ra) {
- int savings = (ra->allocation_size - ra->size) *
- (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
- if (!realloc_array(ra, ra->size)) {
- return 0;
- }
- ra->allocation_size = ra->size;
- return savings;
-}
-
-static void ra_init(roaring_array_t *new_ra) {
- if (!new_ra) { return; }
- new_ra->keys = NULL;
- new_ra->containers = NULL;
- new_ra->typecodes = NULL;
-
- new_ra->allocation_size = 0;
- new_ra->size = 0;
- new_ra->flags = 0;
-}
-
-static bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
- bool copy_on_write) {
- ra_clear_containers(dest); // we are going to overwrite them
- if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size
- dest->size = 0; // <--- This is important.
- return true; // output was just cleared, so they match
- }
- if (dest->allocation_size < source->size) {
- if (!realloc_array(dest, source->size)) {
- return false;
- }
- }
- dest->size = source->size;
- memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
- // we go through the containers, turning them into shared containers...
- if (copy_on_write) {
- int32_t i; for (i = 0; i < dest->size; ++i) {
- source->containers[i] = get_copy_of_container(
- source->containers[i], &source->typecodes[i], copy_on_write);
- }
- // we do a shallow copy to the other bitmap
- memcpy(dest->containers, source->containers,
- dest->size * sizeof(container_t *));
- memcpy(dest->typecodes, source->typecodes,
- dest->size * sizeof(uint8_t));
- } else {
- memcpy(dest->typecodes, source->typecodes,
- dest->size * sizeof(uint8_t));
- int32_t i; for (i = 0; i < dest->size; i++) {
- dest->containers[i] =
- container_clone(source->containers[i], source->typecodes[i]);
- if (dest->containers[i] == NULL) {
- int32_t j; for (j = 0; j < i; j++) {
- container_free(dest->containers[j], dest->typecodes[j]);
- }
- ra_clear_without_containers(dest);
- return false;
- }
- }
- }
- return true;
-}
-
-static void ra_clear_containers(roaring_array_t *ra) {
- int32_t i; for (i = 0; i < ra->size; ++i) {
- container_free(ra->containers[i], ra->typecodes[i]);
- }
-}
-
-static void ra_reset(roaring_array_t *ra) {
- ra_clear_containers(ra);
- ra->size = 0;
- ra_shrink_to_fit(ra);
-}
-
-static void ra_clear_without_containers(roaring_array_t *ra) {
- ndpi_free(ra->containers); // keys and typecodes are allocated with containers
- ra->size = 0;
- ra->allocation_size = 0;
- ra->containers = NULL;
- ra->keys = NULL;
- ra->typecodes = NULL;
-}
-
-static void ra_clear(roaring_array_t *ra) {
- ra_clear_containers(ra);
- ra_clear_without_containers(ra);
-}
-
-static bool extend_array(roaring_array_t *ra, int32_t k) {
- int32_t desired_size = ra->size + k;
- assert(desired_size <= MAX_CONTAINERS);
- if (desired_size > ra->allocation_size) {
- int32_t new_capacity =
- (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
- if (new_capacity > MAX_CONTAINERS) {
- new_capacity = MAX_CONTAINERS;
- }
-
- return realloc_array(ra, new_capacity);
- }
- return true;
-}
-
-static void ra_append(
- roaring_array_t *ra, uint16_t key,
- container_t *c, uint8_t typecode
-){
- extend_array(ra, 1);
- const int32_t pos = ra->size;
-
- ra->keys[pos] = key;
- ra->containers[pos] = c;
- ra->typecodes[pos] = typecode;
- ra->size++;
-}
-
-static void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
- uint16_t index, bool copy_on_write) {
- extend_array(ra, 1);
- const int32_t pos = ra->size;
-
- // old contents is junk not needing freeing
- ra->keys[pos] = sa->keys[index];
- // the shared container will be in two bitmaps
- if (copy_on_write) {
- sa->containers[index] = get_copy_of_container(
- sa->containers[index], &sa->typecodes[index], copy_on_write);
- ra->containers[pos] = sa->containers[index];
- ra->typecodes[pos] = sa->typecodes[index];
- } else {
- ra->containers[pos] =
- container_clone(sa->containers[index], sa->typecodes[index]);
- ra->typecodes[pos] = sa->typecodes[index];
- }
- ra->size++;
-}
-
-static void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
- uint16_t stopping_key, bool copy_on_write) {
- int32_t i; for (i = 0; i < sa->size; ++i) {
- if (sa->keys[i] >= stopping_key) break;
- ra_append_copy(ra, sa, i, copy_on_write);
- }
-}
-
-static void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
- int32_t start_index, int32_t end_index,
- bool copy_on_write) {
- extend_array(ra, end_index - start_index);
- int32_t i; for (i = start_index; i < end_index; ++i) {
- const int32_t pos = ra->size;
- ra->keys[pos] = sa->keys[i];
- if (copy_on_write) {
- sa->containers[i] = get_copy_of_container(
- sa->containers[i], &sa->typecodes[i], copy_on_write);
- ra->containers[pos] = sa->containers[i];
- ra->typecodes[pos] = sa->typecodes[i];
- } else {
- ra->containers[pos] =
- container_clone(sa->containers[i], sa->typecodes[i]);
- ra->typecodes[pos] = sa->typecodes[i];
- }
- ra->size++;
- }
-}
-
-static void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
- uint16_t before_start, bool copy_on_write) {
- int start_location = ra_get_index(sa, before_start);
- if (start_location >= 0)
- ++start_location;
- else
- start_location = -start_location - 1;
- ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write);
-}
-
-static void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
- int32_t start_index, int32_t end_index) {
- extend_array(ra, end_index - start_index);
-
- int32_t i; for (i = start_index; i < end_index; ++i) {
- const int32_t pos = ra->size;
-
- ra->keys[pos] = sa->keys[i];
- ra->containers[pos] = sa->containers[i];
- ra->typecodes[pos] = sa->typecodes[i];
- ra->size++;
- }
-}
-
-static void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
- int32_t start_index, int32_t end_index,
- bool copy_on_write) {
- extend_array(ra, end_index - start_index);
-
- int32_t i; for (i = start_index; i < end_index; ++i) {
- const int32_t pos = ra->size;
- ra->keys[pos] = sa->keys[i];
- if (copy_on_write) {
- sa->containers[i] = get_copy_of_container(
- sa->containers[i], &sa->typecodes[i], copy_on_write);
- ra->containers[pos] = sa->containers[i];
- ra->typecodes[pos] = sa->typecodes[i];
- } else {
- ra->containers[pos] =
- container_clone(sa->containers[i], sa->typecodes[i]);
- ra->typecodes[pos] = sa->typecodes[i];
- }
- ra->size++;
- }
-}
-
-static container_t *ra_get_container(
- roaring_array_t *ra, uint16_t x, uint8_t *typecode
-){
- int i = binarySearch(ra->keys, (int32_t)ra->size, x);
- if (i < 0) return NULL;
- *typecode = ra->typecodes[i];
- return ra->containers[i];
-}
-
-extern inline container_t *ra_get_container_at_index(
- const roaring_array_t *ra, uint16_t i,
- uint8_t *typecode);
-
-#ifdef ROARING_NOT_USED
-static container_t *ra_get_writable_container(
- roaring_array_t *ra, uint16_t x,
- uint8_t *typecode
-){
- int i = binarySearch(ra->keys, (int32_t)ra->size, x);
- if (i < 0) return NULL;
- *typecode = ra->typecodes[i];
- return get_writable_copy_if_shared(ra->containers[i], typecode);
-}
-
-static container_t *ra_get_writable_container_at_index(
- roaring_array_t *ra, uint16_t i,
- uint8_t *typecode
-){
- assert(i < ra->size);
- *typecode = ra->typecodes[i];
- return get_writable_copy_if_shared(ra->containers[i], typecode);
-}
-#endif
-
-static uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
- return ra->keys[i];
-}
-
-extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
-
-extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
- int32_t pos);
-
-// everything skipped over is freed
-static int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
- while (pos < ra->size && ra->keys[pos] < x) {
- container_free(ra->containers[pos], ra->typecodes[pos]);
- ++pos;
- }
- return pos;
-}
-
-static void ra_insert_new_key_value_at(
- roaring_array_t *ra, int32_t i, uint16_t key,
- container_t *c, uint8_t typecode
-){
- extend_array(ra, 1);
- // May be an optimization opportunity with DIY memmove
- memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
- sizeof(uint16_t) * (ra->size - i));
- memmove(&(ra->containers[i + 1]), &(ra->containers[i]),
- sizeof(container_t *) * (ra->size - i));
- memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]),
- sizeof(uint8_t) * (ra->size - i));
- ra->keys[i] = key;
- ra->containers[i] = c;
- ra->typecodes[i] = typecode;
- ra->size++;
-}
-
-// note: Java routine set things to 0, enabling GC.
-// Java called it "resize" but it was always used to downsize.
-// Allowing upsize would break the conventions about
-// valid containers below ra->size.
-
-static void ra_downsize(roaring_array_t *ra, int32_t new_length) {
- assert(new_length <= ra->size);
- ra->size = new_length;
-}
-
-static void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
- memmove(&(ra->containers[i]), &(ra->containers[i + 1]),
- sizeof(container_t *) * (ra->size - i - 1));
- memmove(&(ra->keys[i]), &(ra->keys[i + 1]),
- sizeof(uint16_t) * (ra->size - i - 1));
- memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]),
- sizeof(uint8_t) * (ra->size - i - 1));
- ra->size--;
-}
-
-static void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
- container_free(ra->containers[i], ra->typecodes[i]);
- ra_remove_at_index(ra, i);
-}
-
-// used in inplace andNot only, to slide left the containers from
-// the mutated RoaringBitmap that are after the largest container of
-// the argument RoaringBitmap. In use it should be followed by a call to
-// downsize.
-//
-static void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
- uint32_t new_begin) {
- assert(begin <= end);
- assert(new_begin < begin);
-
- const int range = end - begin;
-
- // We ensure to previously have freed overwritten containers
- // that are not copied elsewhere
-
- memmove(&(ra->containers[new_begin]), &(ra->containers[begin]),
- sizeof(container_t *) * range);
- memmove(&(ra->keys[new_begin]), &(ra->keys[begin]),
- sizeof(uint16_t) * range);
- memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]),
- sizeof(uint8_t) * range);
-}
-
-static void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
- if (distance > 0) {
- extend_array(ra, distance);
- }
- int32_t srcpos = ra->size - count;
- int32_t dstpos = srcpos + distance;
- memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]),
- sizeof(uint16_t) * count);
- memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
- sizeof(container_t *) * count);
- memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
- sizeof(uint8_t) * count);
- ra->size += distance;
-}
-
-
-static void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
- size_t ctr = 0;
- int32_t i; for (i = 0; i < ra->size; ++i) {
- int num_added = container_to_uint32_array(
- ans + ctr, ra->containers[i], ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
- ctr += num_added;
- }
-}
-
-static bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
- size_t ctr = 0;
- size_t dtr = 0;
-
- size_t t_limit = 0;
-
- bool first = false;
- size_t first_skip = 0;
-
- uint32_t *t_ans = NULL;
- size_t cur_len = 0;
-
- int i = 0; for (i = 0; i < ra->size; ++i) {
-
- const container_t *c = container_unwrap_shared(
- ra->containers[i], &ra->typecodes[i]);
- switch (ra->typecodes[i]) {
- case BITSET_CONTAINER_TYPE:
- t_limit = (const_CAST_bitset(c))->cardinality;
- break;
- case ARRAY_CONTAINER_TYPE:
- t_limit = (const_CAST_array(c))->cardinality;
- break;
- case RUN_CONTAINER_TYPE:
- t_limit = run_container_cardinality(const_CAST_run(c));
- break;
- }
- if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
- if (!first){
- //first_skip = t_limit - (ctr + t_limit - offset);
- first_skip = offset - ctr;
- first = true;
- t_ans = (uint32_t *)ndpi_malloc(sizeof(*t_ans) * (first_skip + limit));
- if(t_ans == NULL) {
- return false;
- }
- memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ;
- cur_len = first_skip + limit;
- }
- if (dtr + t_limit > cur_len){
- uint32_t * append_ans = (uint32_t *)ndpi_malloc(sizeof(*append_ans) * (cur_len + t_limit));
- if(append_ans == NULL) {
- if(t_ans != NULL) ndpi_free(t_ans);
- return false;
- }
- memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit));
- cur_len = cur_len + t_limit;
- memcpy(append_ans, t_ans, dtr * sizeof(uint32_t));
- ndpi_free(t_ans);
- t_ans = append_ans;
- }
- switch (ra->typecodes[i]) {
- case BITSET_CONTAINER_TYPE:
- container_to_uint32_array(
- t_ans + dtr,
- const_CAST_bitset(c), ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
- break;
- case ARRAY_CONTAINER_TYPE:
- container_to_uint32_array(
- t_ans + dtr,
- const_CAST_array(c), ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
- break;
- case RUN_CONTAINER_TYPE:
- container_to_uint32_array(
- t_ans + dtr,
- const_CAST_run(c), ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
- break;
- }
- dtr += t_limit;
- }
- ctr += t_limit;
- if (dtr-first_skip >= limit) break;
- }
- if(t_ans != NULL) {
- memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t));
- ndpi_free(t_ans);
- }
- return true;
-}
-
-static bool ra_has_run_container(const roaring_array_t *ra) {
- int32_t k; for (k = 0; k < ra->size; ++k) {
- if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
- RUN_CONTAINER_TYPE)
- return true;
- }
- return false;
-}
-
-static uint32_t ra_portable_header_size(const roaring_array_t *ra) {
- if (ra_has_run_container(ra)) {
- if (ra->size <
- NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets
- return 4 + (ra->size + 7) / 8 + 4 * ra->size;
- }
- return 4 + (ra->size + 7) / 8 +
- 8 * ra->size; // - 4 because we pack the size with the cookie
- } else {
- return 4 + 4 + 8 * ra->size;
- }
-}
-
-static size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
- size_t count = ra_portable_header_size(ra);
-
- int32_t k; for (k = 0; k < ra->size; ++k) {
- count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
- }
- return count;
-}
-
-static size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
- char *initbuf = buf;
- uint32_t startOffset = 0;
- bool hasrun = ra_has_run_container(ra);
- if (hasrun) {
- uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
- memcpy(buf, &cookie, sizeof(cookie));
- buf += sizeof(cookie);
- uint32_t s = (ra->size + 7) / 8;
- uint8_t *bitmapOfRunContainers = (uint8_t *)ndpi_calloc(s, 1);
- assert(bitmapOfRunContainers != NULL); // todo: handle
- int32_t i; for (i = 0; i < ra->size; ++i) {
- if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
- RUN_CONTAINER_TYPE) {
- bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
- }
- }
- memcpy(buf, bitmapOfRunContainers, s);
- buf += s;
- ndpi_free(bitmapOfRunContainers);
- if (ra->size < NO_OFFSET_THRESHOLD) {
- startOffset = 4 + 4 * ra->size + s;
- } else {
- startOffset = 4 + 8 * ra->size + s;
- }
- } else { // backwards compatibility
- uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
-
- memcpy(buf, &cookie, sizeof(cookie));
- buf += sizeof(cookie);
- memcpy(buf, &ra->size, sizeof(ra->size));
- buf += sizeof(ra->size);
-
- startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
- }
- int32_t k; for (k = 0; k < ra->size; ++k) {
- memcpy(buf, &ra->keys[k], sizeof(ra->keys[k]));
- buf += sizeof(ra->keys[k]);
- // get_cardinality returns a value in [1,1<<16], subtracting one
- // we get [0,1<<16 - 1] which fits in 16 bits
- uint16_t card = (uint16_t)(
- container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1);
- memcpy(buf, &card, sizeof(card));
- buf += sizeof(card);
- }
- if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
- // writing the containers offsets
- int32_t k; for (k = 0; k < ra->size; k++) {
- memcpy(buf, &startOffset, sizeof(startOffset));
- buf += sizeof(startOffset);
- startOffset =
- startOffset +
- container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
- }
- }
- for (k = 0; k < ra->size; ++k) {
- buf += container_write(ra->containers[k], ra->typecodes[k], buf);
- }
- return buf - initbuf;
-}
-
-// Quickly checks whether there is a serialized bitmap at the pointer,
-// not exceeding size "maxbytes" in bytes. This function does not allocate
-// memory dynamically.
-//
-// This function returns 0 if and only if no valid bitmap is found.
-// Otherwise, it returns how many bytes are occupied.
-//
-static size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
- size_t bytestotal = sizeof(int32_t);// for cookie
- if(bytestotal > maxbytes) return 0;
- uint32_t cookie;
- memcpy(&cookie, buf, sizeof(int32_t));
- buf += sizeof(uint32_t);
- if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
- cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
- return 0;
- }
- int32_t size;
-
- if ((cookie & 0xFFFF) == SERIAL_COOKIE)
- size = (cookie >> 16) + 1;
- else {
- bytestotal += sizeof(int32_t);
- if(bytestotal > maxbytes) return 0;
- memcpy(&size, buf, sizeof(int32_t));
- buf += sizeof(uint32_t);
- }
- if (size > (1<<16)) {
- return 0; // logically impossible
- }
- char *bitmapOfRunContainers = NULL;
- bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
- if (hasrun) {
- int32_t s = (size + 7) / 8;
- bytestotal += s;
- if(bytestotal > maxbytes) return 0;
- bitmapOfRunContainers = (char *)buf;
- buf += s;
- }
- bytestotal += size * 2 * sizeof(uint16_t);
- if(bytestotal > maxbytes) return 0;
- uint16_t *keyscards = (uint16_t *)buf;
- buf += size * 2 * sizeof(uint16_t);
- if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
- // skipping the offsets
- bytestotal += size * 4;
- if(bytestotal > maxbytes) return 0;
- buf += size * 4;
- }
- // Reading the containers
- int32_t k; for (k = 0; k < size; ++k) {
- uint16_t tmp;
- memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
- uint32_t thiscard = tmp + 1;
- bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
- bool isrun = false;
- if(hasrun) {
- if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
- isbitmap = false;
- isrun = true;
- }
- }
- if (isbitmap) {
- size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
- bytestotal += containersize;
- if(bytestotal > maxbytes) return 0;
- buf += containersize;
- } else if (isrun) {
- bytestotal += sizeof(uint16_t);
- if(bytestotal > maxbytes) return 0;
- uint16_t n_runs;
- memcpy(&n_runs, buf, sizeof(uint16_t));
- buf += sizeof(uint16_t);
- size_t containersize = n_runs * sizeof(rle16_t);
- bytestotal += containersize;
- if(bytestotal > maxbytes) return 0;
- buf += containersize;
- } else {
- size_t containersize = thiscard * sizeof(uint16_t);
- bytestotal += containersize;
- if(bytestotal > maxbytes) return 0;
- buf += containersize;
- }
- }
- return bytestotal;
-}
-
-
-// this function populates answer from the content of buf (reading up to maxbytes bytes).
-// The function returns false if a properly serialized bitmap cannot be found.
-// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
-static bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
- *readbytes = sizeof(int32_t);// for cookie
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
- return false;
- }
- uint32_t cookie;
- memcpy(&cookie, buf, sizeof(int32_t));
- buf += sizeof(uint32_t);
- if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
- cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
- fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
- cookie);
- return false;
- }
- int32_t size;
-
- if ((cookie & 0xFFFF) == SERIAL_COOKIE)
- size = (cookie >> 16) + 1;
- else {
- *readbytes += sizeof(int32_t);
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
- return false;
- }
- memcpy(&size, buf, sizeof(int32_t));
- buf += sizeof(uint32_t);
- }
- if (size > (1<<16)) {
- fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
- size);
- return false; // logically impossible
- }
- const char *bitmapOfRunContainers = NULL;
- bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
- if (hasrun) {
- int32_t s = (size + 7) / 8;
- *readbytes += s;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
- return false;
- }
- bitmapOfRunContainers = buf;
- buf += s;
- }
- uint16_t *keyscards = (uint16_t *)buf;
-
- *readbytes += size * 2 * sizeof(uint16_t);
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
- return false;
- }
- buf += size * 2 * sizeof(uint16_t);
-
- bool is_ok = ra_init_with_capacity(answer, size);
- if (!is_ok) {
- fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
- return false;
- }
-
- int32_t k; for (k = 0; k < size; ++k) {
- uint16_t tmp;
- memcpy(&tmp, keyscards + 2*k, sizeof(tmp));
- answer->keys[k] = tmp;
- }
- if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
- *readbytes += size * 4;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Ran out of bytes while reading offsets.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
-
- // skipping the offsets
- buf += size * 4;
- }
- // Reading the containers
- for (k = 0; k < size; ++k) {
- uint16_t tmp;
- memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
- uint32_t thiscard = tmp + 1;
- bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
- bool isrun = false;
- if(hasrun) {
- if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
- isbitmap = false;
- isrun = true;
- }
- }
- if (isbitmap) {
- // we check that the read is allowed
- size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
- *readbytes += containersize;
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- // it is now safe to read
- bitset_container_t *c = bitset_container_create();
- if(c == NULL) {// memory allocation failure
- fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- answer->size++;
- buf += bitset_container_read(thiscard, c, buf);
- answer->containers[k] = c;
- answer->typecodes[k] = BITSET_CONTAINER_TYPE;
- } else if (isrun) {
- // we check that the read is allowed
- *readbytes += sizeof(uint16_t);
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- uint16_t n_runs;
- memcpy(&n_runs, buf, sizeof(uint16_t));
- size_t containersize = n_runs * sizeof(rle16_t);
- *readbytes += containersize;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Running out of bytes while reading a run container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- // it is now safe to read
-
- run_container_t *c = run_container_create();
- if(c == NULL) {// memory allocation failure
- fprintf(stderr, "Failed to allocate memory for a run container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- answer->size++;
- buf += run_container_read(thiscard, c, buf);
- answer->containers[k] = c;
- answer->typecodes[k] = RUN_CONTAINER_TYPE;
- } else {
- // we check that the read is allowed
- size_t containersize = thiscard * sizeof(uint16_t);
- *readbytes += containersize;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Running out of bytes while reading an array container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- // it is now safe to read
- array_container_t *c =
- array_container_create_given_capacity(thiscard);
- if(c == NULL) {// memory allocation failure
- fprintf(stderr, "Failed to allocate memory for an array container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
- }
- answer->size++;
- buf += array_container_read(thiscard, c, buf);
- answer->containers[k] = c;
- answer->typecodes[k] = ARRAY_CONTAINER_TYPE;
- }
- }
- return true;
-}
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
-#endif
-/* end file src/roaring_array.c */
-/* begin file src/roaring_priority_queue.c */
-
-
-#ifdef __cplusplus
-using namespace ::roaring::internal;
-
-extern "C" { namespace roaring { namespace api {
-#endif
-
-struct roaring_pq_element_s {
- uint64_t size;
- bool is_temporary;
- roaring_bitmap_t *bitmap;
-};
-
-typedef struct roaring_pq_element_s roaring_pq_element_t;
-
-struct roaring_pq_s {
- roaring_pq_element_t *elements;
- uint64_t size;
-};
-
-typedef struct roaring_pq_s roaring_pq_t;
-
-static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) {
- return t1->size < t2->size;
-}
-
-static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) {
- uint64_t i = pq->size;
- pq->elements[pq->size++] = *t;
- while (i > 0) {
- uint64_t p = (i - 1) >> 1;
- roaring_pq_element_t ap = pq->elements[p];
- if (!compare(t, &ap)) break;
- pq->elements[i] = ap;
- i = p;
- }
- pq->elements[i] = *t;
-}
-
-static void pq_free(roaring_pq_t *pq) {
- ndpi_free(pq);
-}
-
-static void percolate_down(roaring_pq_t *pq, uint32_t i) {
- uint32_t size = (uint32_t)pq->size;
- uint32_t hsize = size >> 1;
- roaring_pq_element_t ai = pq->elements[i];
- while (i < hsize) {
- uint32_t l = (i << 1) + 1;
- uint32_t r = l + 1;
- roaring_pq_element_t bestc = pq->elements[l];
- if (r < size) {
- if (compare(pq->elements + r, &bestc)) {
- l = r;
- bestc = pq->elements[r];
- }
- }
- if (!compare(&bestc, &ai)) {
- break;
- }
- pq->elements[i] = bestc;
- i = l;
- }
- pq->elements[i] = ai;
-}
-
-static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) {
- size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length;
- roaring_pq_t *answer = (roaring_pq_t *)ndpi_malloc(alloc_size);
- answer->elements = (roaring_pq_element_t *)(answer + 1);
- answer->size = length;
- uint32_t i; for (i = 0; i < length; i++) {
- answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i];
- answer->elements[i].is_temporary = false;
- answer->elements[i].size =
- roaring_bitmap_portable_size_in_bytes(arr[i]);
- }
- {
- int32_t i;
- for (i = (length >> 1); i >= 0; i--) {
- percolate_down(answer, i);
- }
- }
- return answer;
-}
-
-static roaring_pq_element_t pq_poll(roaring_pq_t *pq) {
- roaring_pq_element_t ans = *pq->elements;
- if (pq->size > 1) {
- pq->elements[0] = pq->elements[--pq->size];
- percolate_down(pq, 0);
- } else
- --pq->size;
- // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size;
- return ans;
-}
-
-// this function consumes and frees the inputs
-static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
- roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- const int length1 = ra_get_size(&x1->high_low_container),
- length2 = ra_get_size(&x2->high_low_container);
- if (0 == length1) {
- roaring_bitmap_free(x1);
- return x2;
- }
- if (0 == length2) {
- roaring_bitmap_free(x2);
- return x1;
- }
- uint32_t neededcap = length1 > length2 ? length2 : length1;
- roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- // todo: unsharing can be inefficient as it may create a clone where
- // none
- // is needed, but it has the benefit of being easy to reason about.
-
- ra_unshare_container_at_index(&x1->high_low_container, pos1);
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- assert(type1 != SHARED_CONTAINER_TYPE);
-
- ra_unshare_container_at_index(&x2->high_low_container, pos2);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- assert(type2 != SHARED_CONTAINER_TYPE);
-
- container_t *c;
-
- if ((type2 == BITSET_CONTAINER_TYPE) &&
- (type1 != BITSET_CONTAINER_TYPE)
- ){
- c = container_lazy_ior(c2, type2, c1, type1, &result_type);
- container_free(c1, type1);
- if (c != c2) {
- container_free(c2, type2);
- }
- } else {
- c = container_lazy_ior(c1, type1, c2, type2, &result_type);
- container_free(c2, type2);
- if (c != c1) {
- container_free(c1, type1);
- }
- }
- // since we assume that the initial containers are non-empty, the
- // result here
- // can only be non-empty
- ra_append(&answer->high_low_container, s1, c, result_type);
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- ra_append(&answer->high_low_container, s1, c1, type1);
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- ra_append(&answer->high_low_container, s2, c2, type2);
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_move_range(&answer->high_low_container,
- &x2->high_low_container, pos2, length2);
- } else if (pos2 == length2) {
- ra_append_move_range(&answer->high_low_container,
- &x1->high_low_container, pos1, length1);
- }
- ra_clear_without_containers(&x1->high_low_container);
- ra_clear_without_containers(&x2->high_low_container);
- ndpi_free(x1);
- ndpi_free(x2);
- return answer;
-}
-
-/**
- * Compute the union of 'number' bitmaps using a heap. This can
- * sometimes be faster than roaring_bitmap_or_many which uses
- * a naive algorithm. Caller is responsible for freeing the
- * result.
- */
-roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
- const roaring_bitmap_t **x) {
- if (number == 0) {
- return roaring_bitmap_create();
- }
- if (number == 1) {
- return roaring_bitmap_copy(x[0]);
- }
- roaring_pq_t *pq = create_pq(x, number);
- while (pq->size > 1) {
- roaring_pq_element_t x1 = pq_poll(pq);
- roaring_pq_element_t x2 = pq_poll(pq);
-
- if (x1.is_temporary && x2.is_temporary) {
- roaring_bitmap_t *newb =
- lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap);
- // should normally return a fresh new bitmap *except* that
- // it can return x1.bitmap or x2.bitmap in degenerate cases
- bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap));
- uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
- roaring_pq_element_t newelement = {
- .size = bsize, .is_temporary = temporary, .bitmap = newb};
- pq_add(pq, &newelement);
- } else if (x2.is_temporary) {
- roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false);
- x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap);
- pq_add(pq, &x2);
- } else if (x1.is_temporary) {
- roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false);
- x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap);
-
- pq_add(pq, &x1);
- } else {
- roaring_bitmap_t *newb =
- roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false);
- uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
- roaring_pq_element_t newelement = {
- .size = bsize, .is_temporary = true, .bitmap = newb};
-
- pq_add(pq, &newelement);
- }
- }
- roaring_pq_element_t X = pq_poll(pq);
- roaring_bitmap_t *answer = X.bitmap;
- roaring_bitmap_repair_after_lazy(answer);
- pq_free(pq);
- return answer;
-}
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace api {
-#endif
-/* end file src/roaring_priority_queue.c */
-/* begin file src/roaring.c */
-#include <assert.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-
-
-
-#ifdef __cplusplus
-using namespace ::roaring::internal;
-
-extern "C" { namespace roaring { namespace api {
-#endif
-
-extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r,
- uint32_t val);
-extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
-extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
-
-static inline bool is_cow(const roaring_bitmap_t *r) {
- return r->high_low_container.flags & ROARING_FLAG_COW;
-}
-static inline bool is_frozen(const roaring_bitmap_t *r) {
- return r->high_low_container.flags & ROARING_FLAG_FROZEN;
-}
-
-// this is like roaring_bitmap_add, but it populates pointer arguments in such a
-// way
-// that we can recover the container touched, which, in turn can be used to
-// accelerate some functions (when you repeatedly need to add to the same
-// container)
-static inline container_t *containerptr_roaring_bitmap_add(
- roaring_bitmap_t *r, uint32_t val,
- uint8_t *type, int *index
-){
- roaring_array_t *ra = &r->high_low_container;
-
- uint16_t hb = val >> 16;
- const int i = ra_get_index(ra, hb);
- if (i >= 0) {
- ra_unshare_container_at_index(ra, i);
- container_t *c = ra_get_container_at_index(ra, i, type);
- uint8_t new_type = *type;
- container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
- *index = i;
- if (c2 != c) {
- container_free(c, *type);
- ra_set_container_at_index(ra, i, c2, new_type);
- *type = new_type;
- return c2;
- } else {
- return c;
- }
- } else {
- array_container_t *new_ac = array_container_create();
- container_t *c = container_add(new_ac, val & 0xFFFF,
- ARRAY_CONTAINER_TYPE, type);
- // we could just assume that it stays an array container
- ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
- *index = -i - 1;
- return c;
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
- roaring_bitmap_t *ans =
- (roaring_bitmap_t *)ndpi_malloc(sizeof(roaring_bitmap_t));
- if (!ans) {
- return NULL;
- }
- bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
- if (!is_ok) {
- ndpi_free(ans);
- return NULL;
- }
- return ans;
-}
-
-static bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
- return ra_init_with_capacity(&r->high_low_container, cap);
-}
-
-
-static void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
- const uint32_t *vals) {
- container_t *container = NULL; // hold value of last container touched
- uint8_t typecode = 0; // typecode of last container touched
- uint32_t prev = 0; // previous valued inserted
- size_t i = 0; // index of value
- int containerindex = 0;
- if (n_args == 0) return;
- uint32_t val;
- memcpy(&val, vals + i, sizeof(val));
- container =
- containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
- prev = val;
- i++;
- for (; i < n_args; i++) {
- memcpy(&val, vals + i, sizeof(val));
- if (((prev ^ val) >> 16) ==
- 0) { // no need to seek the container, it is at hand
- // because we already have the container at hand, we can do the
- // insertion
- // automatically, bypassing the roaring_bitmap_add call
- uint8_t newtypecode = typecode;
- container_t *container2 =
- container_add(container, val & 0xFFFF, typecode, &newtypecode);
- if (container2 != container) { // rare instance when we need to
- // change the container type
- container_free(container, typecode);
- ra_set_container_at_index(&r->high_low_container,
- containerindex, container2,
- newtypecode);
- typecode = newtypecode;
- container = container2;
- }
- } else {
- container = containerptr_roaring_bitmap_add(r, val, &typecode,
- &containerindex);
- }
- prev = val;
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
- roaring_bitmap_t *answer = roaring_bitmap_create();
- roaring_bitmap_add_many(answer, n_args, vals);
- return answer;
-}
-
-static roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
- // todo: could be greatly optimized but we do not expect this call to ever
- // include long lists
- roaring_bitmap_t *answer = roaring_bitmap_create();
- va_list ap;
- va_start(ap, n_args);
- size_t i; for (i = 1; i <= n_args; i++) {
- uint32_t val = va_arg(ap, uint32_t);
- roaring_bitmap_add(answer, val);
- }
- va_end(ap);
- return answer;
-}
-
-static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
- return (a < b) ? a : b;
-}
-
-static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
- return (a < b) ? a : b;
-}
-
-static roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
- uint32_t step) {
- if(max >= UINT64_C(0x100000000)) {
- max = UINT64_C(0x100000000);
- }
- if (step == 0) return NULL;
- if (max <= min) return NULL;
- roaring_bitmap_t *answer = roaring_bitmap_create();
- if (step >= (1 << 16)) {
- uint32_t value; for (value = (uint32_t)min; value < max; value += step) {
- roaring_bitmap_add(answer, value);
- }
- return answer;
- }
- uint64_t min_tmp = min;
- do {
- uint32_t key = (uint32_t)min_tmp >> 16;
- uint32_t container_min = min_tmp & 0xFFFF;
- uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
- uint8_t type;
- container_t *container = container_from_range(&type, container_min,
- container_max, (uint16_t)step);
- ra_append(&answer->high_low_container, key, container, type);
- uint32_t gap = container_max - container_min + step - 1;
- min_tmp += gap - (gap % step);
- } while (min_tmp < max);
- // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
- return answer;
-}
-
-static void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
- if (min > max) {
- return;
- }
-
- roaring_array_t *ra = &r->high_low_container;
-
- uint32_t min_key = min >> 16;
- uint32_t max_key = max >> 16;
-
- int32_t num_required_containers = max_key - min_key + 1;
- int32_t suffix_length = count_greater(ra->keys, ra->size, max_key);
- int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length,
- min_key);
- int32_t common_length = ra->size - prefix_length - suffix_length;
-
- if (num_required_containers > common_length) {
- ra_shift_tail(ra, suffix_length,
- num_required_containers - common_length);
- }
-
- int32_t src = prefix_length + common_length - 1;
- int32_t dst = ra->size - suffix_length - 1;
- uint32_t key; for ( key = max_key; key != min_key-1; key--) { // beware of min_key==0
- uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
- uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
- container_t* new_container;
- uint8_t new_type;
-
- if (src >= 0 && ra->keys[src] == key) {
- ra_unshare_container_at_index(ra, src);
- new_container = container_add_range(ra->containers[src],
- ra->typecodes[src],
- container_min, container_max,
- &new_type);
- if (new_container != ra->containers[src]) {
- container_free(ra->containers[src],
- ra->typecodes[src]);
- }
- src--;
- } else {
- new_container = container_from_range(&new_type, container_min,
- container_max+1, 1);
- }
- ra_replace_key_and_container_at_index(ra, dst, key, new_container,
- new_type);
- dst--;
- }
-}
-
-static void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
- if (min > max) {
- return;
- }
-
- roaring_array_t *ra = &r->high_low_container;
-
- uint32_t min_key = min >> 16;
- uint32_t max_key = max >> 16;
-
- int32_t src = count_less(ra->keys, ra->size, min_key);
- int32_t dst = src;
- while (src < ra->size && ra->keys[src] <= max_key) {
- uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
- uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
- ra_unshare_container_at_index(ra, src);
- container_t *new_container;
- uint8_t new_type;
- new_container = container_remove_range(ra->containers[src],
- ra->typecodes[src],
- container_min, container_max,
- &new_type);
- if (new_container != ra->containers[src]) {
- container_free(ra->containers[src],
- ra->typecodes[src]);
- }
- if (new_container) {
- ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
- new_container, new_type);
- dst++;
- }
- src++;
- }
- if (src > dst) {
- ra_shift_tail(ra, ra->size - src, dst - src);
- }
-}
-
-extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
-extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
-
-static void roaring_bitmap_printf(const roaring_bitmap_t *r) {
- const roaring_array_t *ra = &r->high_low_container;
-
- printf("{");
- int i = 0; for (i = 0; i < ra->size; ++i) {
- container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
-
- if (i + 1 < ra->size) {
- printf(",");
- }
- }
- printf("}");
-}
-
-static void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
- const roaring_array_t *ra = &r->high_low_container;
-
- printf("{");
- int i = 0; for (i = 0; i < ra->size; ++i) {
- printf("%d: %s (%d)", ra->keys[i],
- get_full_container_name(ra->containers[i], ra->typecodes[i]),
- container_get_cardinality(ra->containers[i], ra->typecodes[i]));
- if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
- printf(
- "(shared count = %" PRIu32 " )",
- CAST_shared(ra->containers[i])->counter);
- }
-
- if (i + 1 < ra->size) {
- printf(", ");
- }
- }
- printf("}");
-}
-
-typedef struct min_max_sum_s {
- uint32_t min;
- uint32_t max;
- uint64_t sum;
-} min_max_sum_t;
-
-static bool min_max_sum_fnc(uint32_t value, void *param) {
- min_max_sum_t *mms = (min_max_sum_t *)param;
- if (value > mms->max) mms->max = value;
- if (value < mms->min) mms->min = value;
- mms->sum += value;
- return true; // we always process all data points
-}
-
-/**
-* (For advanced users.)
-* Collect statistics about the bitmap
-*/
-static void roaring_bitmap_statistics(const roaring_bitmap_t *r,
- roaring_statistics_t *stat) {
- const roaring_array_t *ra = &r->high_low_container;
-
- memset(stat, 0, sizeof(*stat));
- stat->n_containers = ra->size;
- stat->cardinality = roaring_bitmap_get_cardinality(r);
- min_max_sum_t mms;
- mms.min = UINT32_C(0xFFFFFFFF);
- mms.max = UINT32_C(0);
- mms.sum = 0;
- roaring_iterate(r, &min_max_sum_fnc, &mms);
- stat->min_value = mms.min;
- stat->max_value = mms.max;
- stat->sum_value = mms.sum;
-
- int i = 0; for (i = 0; i < ra->size; ++i) {
- uint8_t truetype =
- get_container_type(ra->containers[i], ra->typecodes[i]);
- uint32_t card =
- container_get_cardinality(ra->containers[i], ra->typecodes[i]);
- uint32_t sbytes =
- container_size_in_bytes(ra->containers[i], ra->typecodes[i]);
- switch (truetype) {
- case BITSET_CONTAINER_TYPE:
- stat->n_bitset_containers++;
- stat->n_values_bitset_containers += card;
- stat->n_bytes_bitset_containers += sbytes;
- break;
- case ARRAY_CONTAINER_TYPE:
- stat->n_array_containers++;
- stat->n_values_array_containers += card;
- stat->n_bytes_array_containers += sbytes;
- break;
- case RUN_CONTAINER_TYPE:
- stat->n_run_containers++;
- stat->n_values_run_containers += card;
- stat->n_bytes_run_containers += sbytes;
- break;
- default:
- assert(false);
- __builtin_unreachable();
- }
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
- roaring_bitmap_t *ans =
- (roaring_bitmap_t *)ndpi_malloc(sizeof(roaring_bitmap_t));
- if (!ans) {
- return NULL;
- }
- if (!ra_init_with_capacity( // allocation of list of containers can fail
- &ans->high_low_container, r->high_low_container.size)
- ){
- ndpi_free(ans);
- return NULL;
- }
- if (!ra_overwrite( // memory allocation of individual containers may fail
- &r->high_low_container, &ans->high_low_container, is_cow(r))
- ){
- roaring_bitmap_free(ans); // overwrite should leave in freeable state
- return NULL;
- }
- roaring_bitmap_set_copy_on_write(ans, is_cow(r));
- return ans;
-}
-
-static bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
- const roaring_bitmap_t *src) {
- roaring_bitmap_set_copy_on_write(dest, is_cow(src));
- return ra_overwrite(&src->high_low_container, &dest->high_low_container,
- is_cow(src));
-}
-
-static void roaring_bitmap_free(const roaring_bitmap_t *r) {
- if (!is_frozen(r)) {
- ra_clear((roaring_array_t*)&r->high_low_container);
- }
- ndpi_free((roaring_bitmap_t*)r);
-}
-
-static void roaring_bitmap_clear(roaring_bitmap_t *r) {
- ra_reset(&r->high_low_container);
-}
-
-static void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
- roaring_array_t *ra = &r->high_low_container;
-
- const uint16_t hb = val >> 16;
- const int i = ra_get_index(ra, hb);
- uint8_t typecode;
- if (i >= 0) {
- ra_unshare_container_at_index(ra, i);
- container_t *container =
- ra_get_container_at_index(ra, i, &typecode);
- uint8_t newtypecode = typecode;
- container_t *container2 =
- container_add(container, val & 0xFFFF, typecode, &newtypecode);
- if (container2 != container) {
- container_free(container, typecode);
- ra_set_container_at_index(&r->high_low_container, i, container2,
- newtypecode);
- }
- } else {
- array_container_t *newac = array_container_create();
- container_t *container = container_add(newac, val & 0xFFFF,
- ARRAY_CONTAINER_TYPE, &typecode);
- // we could just assume that it stays an array container
- ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
- container, typecode);
- }
-}
-
-static bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
- const uint16_t hb = val >> 16;
- const int i = ra_get_index(&r->high_low_container, hb);
- uint8_t typecode;
- bool result = false;
- if (i >= 0) {
- ra_unshare_container_at_index(&r->high_low_container, i);
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
-
- const int oldCardinality =
- container_get_cardinality(container, typecode);
-
- uint8_t newtypecode = typecode;
- container_t *container2 =
- container_add(container, val & 0xFFFF, typecode, &newtypecode);
- if (container2 != container) {
- container_free(container, typecode);
- ra_set_container_at_index(&r->high_low_container, i, container2,
- newtypecode);
- result = true;
- } else {
- const int newCardinality =
- container_get_cardinality(container, newtypecode);
-
- result = oldCardinality != newCardinality;
- }
- } else {
- array_container_t *newac = array_container_create();
- container_t *container = container_add(newac, val & 0xFFFF,
- ARRAY_CONTAINER_TYPE, &typecode);
- // we could just assume that it stays an array container
- ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
- container, typecode);
- result = true;
- }
-
- return result;
-}
-
-static void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
- const uint16_t hb = val >> 16;
- const int i = ra_get_index(&r->high_low_container, hb);
- uint8_t typecode;
- if (i >= 0) {
- ra_unshare_container_at_index(&r->high_low_container, i);
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
- uint8_t newtypecode = typecode;
- container_t *container2 =
- container_remove(container, val & 0xFFFF, typecode, &newtypecode);
- if (container2 != container) {
- container_free(container, typecode);
- ra_set_container_at_index(&r->high_low_container, i, container2,
- newtypecode);
- }
- if (container_get_cardinality(container2, newtypecode) != 0) {
- ra_set_container_at_index(&r->high_low_container, i, container2,
- newtypecode);
- } else {
- ra_remove_at_index_and_free(&r->high_low_container, i);
- }
- }
-}
-
-static bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
- const uint16_t hb = val >> 16;
- const int i = ra_get_index(&r->high_low_container, hb);
- uint8_t typecode;
- bool result = false;
- if (i >= 0) {
- ra_unshare_container_at_index(&r->high_low_container, i);
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
-
- const int oldCardinality =
- container_get_cardinality(container, typecode);
-
- uint8_t newtypecode = typecode;
- container_t *container2 =
- container_remove(container, val & 0xFFFF, typecode, &newtypecode);
- if (container2 != container) {
- container_free(container, typecode);
- ra_set_container_at_index(&r->high_low_container, i, container2,
- newtypecode);
- }
-
- const int newCardinality =
- container_get_cardinality(container2, newtypecode);
-
- if (newCardinality != 0) {
- ra_set_container_at_index(&r->high_low_container, i, container2,
- newtypecode);
- } else {
- ra_remove_at_index_and_free(&r->high_low_container, i);
- }
-
- result = oldCardinality != newCardinality;
- }
- return result;
-}
-
-static void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
- const uint32_t *vals) {
- if (n_args == 0 || r->high_low_container.size == 0) {
- return;
- }
- int32_t pos = -1; // position of the container used in the previous iteration
- size_t i; for (i = 0; i < n_args; i++) {
- uint16_t key = (uint16_t)(vals[i] >> 16);
- if (pos < 0 || key != r->high_low_container.keys[pos]) {
- pos = ra_get_index(&r->high_low_container, key);
- }
- if (pos >= 0) {
- uint8_t new_typecode;
- container_t *new_container;
- new_container = container_remove(r->high_low_container.containers[pos],
- vals[i] & 0xffff,
- r->high_low_container.typecodes[pos],
- &new_typecode);
- if (new_container != r->high_low_container.containers[pos]) {
- container_free(r->high_low_container.containers[pos],
- r->high_low_container.typecodes[pos]);
- ra_replace_key_and_container_at_index(&r->high_low_container,
- pos, key, new_container,
- new_typecode);
- }
- if (!container_nonzero_cardinality(new_container, new_typecode)) {
- container_free(new_container, new_typecode);
- ra_remove_at_index(&r->high_low_container, pos);
- pos = -1;
- }
- }
- }
-}
-
-// there should be some SIMD optimizations possible here
-static roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- uint32_t neededcap = length1 > length2 ? length2 : length1;
- roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
- roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
-
- int pos1 = 0, pos2 = 0;
-
- while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- if (s1 == s2) {
- uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_and(c1, type1, c2, type2, &result_type);
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_append(&answer->high_low_container, s1, c, result_type);
- } else {
- container_free(c, result_type); // otherwise: memory leak!
- }
- ++pos1;
- ++pos2;
- } else if (s1 < s2) { // s1 < s2
- pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
- } else { // s1 > s2
- pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
- }
- }
- return answer;
-}
-
-/**
- * Compute the union of 'number' bitmaps.
- */
-static roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
- const roaring_bitmap_t **x) {
- if (number == 0) {
- return roaring_bitmap_create();
- }
- if (number == 1) {
- return roaring_bitmap_copy(x[0]);
- }
- roaring_bitmap_t *answer =
- roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
- size_t i; for (i = 2; i < number; i++) {
- roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
- }
- roaring_bitmap_repair_after_lazy(answer);
- return answer;
-}
-
-/**
- * Compute the xor of 'number' bitmaps.
- */
-static roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
- const roaring_bitmap_t **x) {
- if (number == 0) {
- return roaring_bitmap_create();
- }
- if (number == 1) {
- return roaring_bitmap_copy(x[0]);
- }
- roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
- size_t i; for (i = 2; i < number; i++) {
- roaring_bitmap_lazy_xor_inplace(answer, x[i]);
- }
- roaring_bitmap_repair_after_lazy(answer);
- return answer;
-}
-
-// inplace and (modifies its first argument).
-static void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- if (x1 == x2) return;
- int pos1 = 0, pos2 = 0, intersection_size = 0;
- const int length1 = ra_get_size(&x1->high_low_container);
- const int length2 = ra_get_size(&x2->high_low_container);
-
- // any skipped-over or newly emptied containers in x1
- // have to be freed.
- while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- if (s1 == s2) {
- uint8_t type1, type2, result_type;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
-
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
- container_t *c =
- (type1 == SHARED_CONTAINER_TYPE)
- ? container_and(c1, type1, c2, type2, &result_type)
- : container_iand(c1, type1, c2, type2, &result_type);
-
- if (c != c1) { // in this instance a new container was created, and
- // we need to free the old one
- container_free(c1, type1);
- }
- if (container_nonzero_cardinality(c, result_type)) {
- ra_replace_key_and_container_at_index(&x1->high_low_container,
- intersection_size, s1, c,
- result_type);
- intersection_size++;
- } else {
- container_free(c, result_type);
- }
- ++pos1;
- ++pos2;
- } else if (s1 < s2) {
- pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
- } else { // s1 > s2
- pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
- }
- }
-
- // if we ended early because x2 ran out, then all remaining in x1 should be
- // freed
- while (pos1 < length1) {
- container_free(x1->high_low_container.containers[pos1],
- x1->high_low_container.typecodes[pos1]);
- ++pos1;
- }
-
- // all containers after this have either been copied or freed
- ra_downsize(&x1->high_low_container, intersection_size);
-}
-
-static roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- if (0 == length1) {
- return roaring_bitmap_copy(x2);
- }
- if (0 == length2) {
- return roaring_bitmap_copy(x1);
- }
- roaring_bitmap_t *answer =
- roaring_bitmap_create_with_capacity(length1 + length2);
- roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_or(c1, type1, c2, type2, &result_type);
-
- // since we assume that the initial containers are non-empty, the
- // result here
- // can only be non-empty
- ra_append(&answer->high_low_container, s1, c, result_type);
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- // c1 = container_clone(c1, type1);
- c1 = get_copy_of_container(c1, &type1, is_cow(x1));
- if (is_cow(x1)) {
- ra_set_container_at_index(&x1->high_low_container, pos1, c1,
- type1);
- }
- ra_append(&answer->high_low_container, s1, c1, type1);
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- // c2 = container_clone(c2, type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
- ra_append(&answer->high_low_container, s2, c2, type2);
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&answer->high_low_container,
- &x2->high_low_container, pos2, length2,
- is_cow(x2));
- } else if (pos2 == length2) {
- ra_append_copy_range(&answer->high_low_container,
- &x1->high_low_container, pos1, length1,
- is_cow(x1));
- }
- return answer;
-}
-
-// inplace or (modifies its first argument).
-static void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- int length1 = x1->high_low_container.size;
- const int length2 = x2->high_low_container.size;
-
- if (0 == length2) return;
-
- if (0 == length1) {
- roaring_bitmap_overwrite(x1, x2);
- return;
- }
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- if (!container_is_full(c1, type1)) {
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c =
- (type1 == SHARED_CONTAINER_TYPE)
- ? container_or(c1, type1, c2, type2, &result_type)
- : container_ior(c1, type1, c2, type2, &result_type);
-
- if (c != c1) { // in this instance a new container was created,
- // and we need to free the old one
- container_free(c1, type1);
- }
- ra_set_container_at_index(&x1->high_low_container, pos1, c,
- result_type);
- }
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
- pos2, &type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
-
- // container_t *c2_clone = container_clone(c2, type2);
- ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
- type2);
- pos1++;
- length1++;
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
- pos2, length2, is_cow(x2));
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- if (0 == length1) {
- return roaring_bitmap_copy(x2);
- }
- if (0 == length2) {
- return roaring_bitmap_copy(x1);
- }
- roaring_bitmap_t *answer =
- roaring_bitmap_create_with_capacity(length1 + length2);
- roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_xor(c1, type1, c2, type2, &result_type);
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_append(&answer->high_low_container, s1, c, result_type);
- } else {
- container_free(c, result_type);
- }
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- c1 = get_copy_of_container(c1, &type1, is_cow(x1));
- if (is_cow(x1)) {
- ra_set_container_at_index(&x1->high_low_container, pos1, c1,
- type1);
- }
- ra_append(&answer->high_low_container, s1, c1, type1);
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
- ra_append(&answer->high_low_container, s2, c2, type2);
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&answer->high_low_container,
- &x2->high_low_container, pos2, length2,
- is_cow(x2));
- } else if (pos2 == length2) {
- ra_append_copy_range(&answer->high_low_container,
- &x1->high_low_container, pos1, length1,
- is_cow(x1));
- }
- return answer;
-}
-
-// inplace xor (modifies its first argument).
-
-static void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- assert(x1 != x2);
- uint8_t result_type = 0;
- int length1 = x1->high_low_container.size;
- const int length2 = x2->high_low_container.size;
-
- if (0 == length2) return;
-
- if (0 == length1) {
- roaring_bitmap_overwrite(x1, x2);
- return;
- }
-
- // XOR can have new containers inserted from x2, but can also
- // lose containers when x1 and x2 are nonempty and identical.
-
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
-
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
-
- container_t *c;
- if (type1 == SHARED_CONTAINER_TYPE) {
- c = container_xor(c1, type1, c2, type2, &result_type);
- shared_container_free(CAST_shared(c1)); // so release
- }
- else {
- c = container_ixor(c1, type1, c2, type2, &result_type);
- }
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_set_container_at_index(&x1->high_low_container, pos1, c,
- result_type);
- ++pos1;
- } else {
- container_free(c, result_type);
- ra_remove_at_index(&x1->high_low_container, pos1);
- --length1;
- }
-
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
-
- ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
- type2);
- pos1++;
- length1++;
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
- pos2, length2, is_cow(x2));
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- if (0 == length1) {
- roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
- roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2));
- return empty_bitmap;
- }
- if (0 == length2) {
- return roaring_bitmap_copy(x1);
- }
- roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
- roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
-
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = 0;
- uint16_t s2 = 0;
- while (true) {
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_andnot(c1, type1, c2, type2,
- &result_type);
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_append(&answer->high_low_container, s1, c, result_type);
- } else {
- container_free(c, result_type);
- }
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- } else if (s1 < s2) { // s1 < s2
- const int next_pos1 =
- ra_advance_until(&x1->high_low_container, s2, pos1);
- ra_append_copy_range(&answer->high_low_container,
- &x1->high_low_container, pos1, next_pos1,
- is_cow(x1));
- // TODO : perhaps some of the copy_on_write should be based on
- // answer rather than x1 (more stringent?). Many similar cases
- pos1 = next_pos1;
- if (pos1 == length1) break;
- } else { // s1 > s2
- pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
- if (pos2 == length2) break;
- }
- }
- if (pos2 == length2) {
- ra_append_copy_range(&answer->high_low_container,
- &x1->high_low_container, pos1, length1,
- is_cow(x1));
- }
- return answer;
-}
-
-// inplace andnot (modifies its first argument).
-
-static void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- assert(x1 != x2);
-
- uint8_t result_type = 0;
- int length1 = x1->high_low_container.size;
- const int length2 = x2->high_low_container.size;
- int intersection_size = 0;
-
- if (0 == length2) return;
-
- if (0 == length1) {
- roaring_bitmap_clear(x1);
- return;
- }
-
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
-
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
-
- container_t *c;
- if (type1 == SHARED_CONTAINER_TYPE) {
- c = container_andnot(c1, type1, c2, type2, &result_type);
- shared_container_free(CAST_shared(c1)); // release
- }
- else {
- c = container_iandnot(c1, type1, c2, type2, &result_type);
- }
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_replace_key_and_container_at_index(&x1->high_low_container,
- intersection_size++, s1,
- c, result_type);
- } else {
- container_free(c, result_type);
- }
-
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- if (pos1 != intersection_size) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
-
- ra_replace_key_and_container_at_index(&x1->high_low_container,
- intersection_size, s1, c1,
- type1);
- }
- intersection_size++;
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
-
- if (pos1 < length1) {
- // all containers between intersection_size and
- // pos1 are junk. However, they have either been moved
- // (thus still referenced) or involved in an iandnot
- // that will clean up all containers that could not be reused.
- // Thus we should not free the junk containers between
- // intersection_size and pos1.
- if (pos1 > intersection_size) {
- // left slide of remaining items
- ra_copy_range(&x1->high_low_container, pos1, length1,
- intersection_size);
- }
- // else current placement is fine
- intersection_size += (length1 - pos1);
- }
- ra_downsize(&x1->high_low_container, intersection_size);
-}
-
-static uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
- const roaring_array_t *ra = &r->high_low_container;
-
- uint64_t card = 0;
- int i = 0; for (i = 0; i < ra->size; ++i)
- card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
- return card;
-}
-
-static uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
- uint64_t range_start,
- uint64_t range_end) {
- const roaring_array_t *ra = &r->high_low_container;
-
- if (range_end > UINT32_MAX) {
- range_end = UINT32_MAX + UINT64_C(1);
- }
- if (range_start >= range_end) {
- return 0;
- }
- range_end--; // make range_end inclusive
- // now we have: 0 <= range_start <= range_end <= UINT32_MAX
-
- uint16_t minhb = range_start >> 16;
- uint16_t maxhb = range_end >> 16;
-
- uint64_t card = 0;
-
- int i = ra_get_index(ra, minhb);
- if (i >= 0) {
- if (minhb == maxhb) {
- card += container_rank(ra->containers[i], ra->typecodes[i],
- range_end & 0xffff);
- } else {
- card += container_get_cardinality(ra->containers[i],
- ra->typecodes[i]);
- }
- if ((range_start & 0xffff) != 0) {
- card -= container_rank(ra->containers[i], ra->typecodes[i],
- (range_start & 0xffff) - 1);
- }
- i++;
- } else {
- i = -i - 1;
- }
-
- for (; i < ra->size; i++) {
- uint16_t key = ra->keys[i];
- if (key < maxhb) {
- card += container_get_cardinality(ra->containers[i],
- ra->typecodes[i]);
- } else if (key == maxhb) {
- card += container_rank(ra->containers[i], ra->typecodes[i],
- range_end & 0xffff);
- break;
- } else {
- break;
- }
- }
-
- return card;
-}
-
-
-static bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
- return r->high_low_container.size == 0;
-}
-
-static void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
- ra_to_uint32_array(&r->high_low_container, ans);
-}
-
-static bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
- size_t offset, size_t limit,
- uint32_t *ans) {
- return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
-}
-
-/** convert array and bitmap containers to run containers when it is more
- * efficient;
- * also convert from run containers when more space efficient. Returns
- * true if the result has at least one run container.
-*/
-static bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
- bool answer = false;
- int i = 0; for (i = 0; i < r->high_low_container.size; i++) {
- uint8_t type_original, type_after;
- ra_unshare_container_at_index(
- &r->high_low_container, i); // TODO: this introduces extra cloning!
- container_t *c = ra_get_container_at_index(&r->high_low_container, i,
- &type_original);
- container_t *c1 = convert_run_optimize(c, type_original, &type_after);
- if (type_after == RUN_CONTAINER_TYPE) {
- answer = true;
- }
- ra_set_container_at_index(&r->high_low_container, i, c1, type_after);
- }
- return answer;
-}
-
-static size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
- size_t answer = 0;
- int i = 0; for (i = 0; i < r->high_low_container.size; i++) {
- uint8_t type_original;
- container_t *c = ra_get_container_at_index(&r->high_low_container, i,
- &type_original);
- answer += container_shrink_to_fit(c, type_original);
- }
- answer += ra_shrink_to_fit(&r->high_low_container);
- return answer;
-}
-
-/**
- * Remove run-length encoding even when it is more space efficient
- * return whether a change was applied
- */
-static bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
- bool answer = false;
- int i = 0; for (i = 0; i < r->high_low_container.size; i++) {
- uint8_t type_original, type_after;
- container_t *c = ra_get_container_at_index(&r->high_low_container, i,
- &type_original);
- if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
- answer = true;
- if (type_original == SHARED_CONTAINER_TYPE) {
- run_container_t *truec = CAST_run(CAST_shared(c)->container);
- int32_t card = run_container_cardinality(truec);
- container_t *c1 = convert_to_bitset_or_array_container(
- truec, card, &type_after);
- shared_container_free(CAST_shared(c)); // frees run as needed
- ra_set_container_at_index(&r->high_low_container, i, c1,
- type_after);
-
- } else {
- int32_t card = run_container_cardinality(CAST_run(c));
- container_t *c1 = convert_to_bitset_or_array_container(
- CAST_run(c), card, &type_after);
- run_container_free(CAST_run(c));
- ra_set_container_at_index(&r->high_low_container, i, c1,
- type_after);
- }
- }
- }
- return answer;
-}
-
-static size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) {
- size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
- uint64_t cardinality = roaring_bitmap_get_cardinality(r);
- uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
- if (portablesize < sizeasarray) {
- buf[0] = SERIALIZATION_CONTAINER;
- return roaring_bitmap_portable_serialize(r, buf + 1) + 1;
- } else {
- buf[0] = SERIALIZATION_ARRAY_UINT32;
- memcpy(buf + 1, &cardinality, sizeof(uint32_t));
- roaring_bitmap_to_uint32_array(
- r, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
- return 1 + (size_t)sizeasarray;
- }
-}
-
-static size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) {
- size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
- uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) +
- sizeof(uint32_t);
- return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
-}
-
-static size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
- return ra_portable_size_in_bytes(&r->high_low_container);
-}
-
-
-static roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
- roaring_bitmap_t *ans =
- (roaring_bitmap_t *)ndpi_malloc(sizeof(roaring_bitmap_t));
- if (ans == NULL) {
- return NULL;
- }
- size_t bytesread;
- bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
- if(is_ok) assert(bytesread <= maxbytes);
- roaring_bitmap_set_copy_on_write(ans, false);
- if (!is_ok) {
- ndpi_free(ans);
- return NULL;
- }
- return ans;
-}
-
-static roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
- return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
-}
-
-
-static size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
- return ra_portable_deserialize_size(buf, maxbytes);
-}
-
-
-static size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r,
- char *buf) {
- return ra_portable_serialize(&r->high_low_container, buf);
-}
-
-static roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
- const char *bufaschar = (const char *)buf;
- if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) {
- /* This looks like a compressed set of uint32_t elements */
- uint32_t card;
- memcpy(&card, bufaschar + 1, sizeof(uint32_t));
- const uint32_t *elems =
- (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
-
- return roaring_bitmap_of_ptr(card, elems);
- } else if (bufaschar[0] == SERIALIZATION_CONTAINER) {
- return roaring_bitmap_portable_deserialize(bufaschar + 1);
- } else
- return (NULL);
-}
-
-static bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
- void *ptr) {
- const roaring_array_t *ra = &r->high_low_container;
-
- int i = 0; for (i = 0; i < ra->size; ++i)
- if (!container_iterate(ra->containers[i], ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16,
- iterator, ptr)) {
- return false;
- }
- return true;
-}
-
-static bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
- uint64_t high_bits, void *ptr) {
- const roaring_array_t *ra = &r->high_low_container;
-
- int i = 0; for (i = 0; i < ra->size; ++i)
- if (!container_iterate64(
- ra->containers[i], ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16, iterator,
- high_bits, ptr)) {
- return false;
- }
- return true;
-}
-
-/****
-* begin roaring_uint32_iterator_t
-*****/
-
-// Partially initializes the roaring iterator when it begins looking at
-// a new container.
-static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
- newit->in_container_index = 0;
- newit->run_index = 0;
- newit->current_value = 0;
- if (newit->container_index >= newit->parent->high_low_container.size ||
- newit->container_index < 0) {
- newit->current_value = UINT32_MAX;
- return (newit->has_value = false);
- }
- // assume not empty
- newit->has_value = true;
- // we precompute container, typecode and highbits so that successive
- // iterators do not have to grab them from odd memory locations
- // and have to worry about the (easily predicted) container_unwrap_shared
- // call.
- newit->container =
- newit->parent->high_low_container.containers[newit->container_index];
- newit->typecode =
- newit->parent->high_low_container.typecodes[newit->container_index];
- newit->highbits =
- ((uint32_t)
- newit->parent->high_low_container.keys[newit->container_index])
- << 16;
- newit->container =
- container_unwrap_shared(newit->container, &(newit->typecode));
- return newit->has_value;
-}
-
-static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
- if (!iter_new_container_partial_init(newit))
- return newit->has_value;
-
- switch (newit->typecode) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc = const_CAST_bitset(newit->container);
-
- uint32_t wordindex = 0;
- uint64_t word;
- while ((word = bc->words[wordindex]) == 0) {
- wordindex++; // advance
- }
- // here "word" is non-zero
- newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
- newit->current_value = newit->highbits | newit->in_container_index;
- break; }
-
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac = const_CAST_array(newit->container);
- newit->current_value = newit->highbits | ac->array[0];
- break; }
-
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(newit->container);
- newit->current_value = newit->highbits | rc->runs[0].value;
- break; }
-
- default:
- // if this ever happens, bug!
- assert(false);
- } // switch (typecode)
- return true;
-}
-
-static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
- if (!iter_new_container_partial_init(newit))
- return newit->has_value;
-
- switch(newit->typecode) {
- case BITSET_CONTAINER_TYPE: {
- uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
- uint64_t word;
- const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
- while ((word = bitset_container->words[wordindex]) == 0)
- --wordindex;
-
- int num_leading_zeros = __builtin_clzll(word);
- newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
- newit->current_value = newit->highbits | newit->in_container_index;
- break;
- }
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t* array_container = (const array_container_t*)newit->container;
- newit->in_container_index = array_container->cardinality - 1;
- newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
- break;
- }
- case RUN_CONTAINER_TYPE: {
- const run_container_t* run_container = (const run_container_t*)newit->container;
- newit->run_index = run_container->n_runs - 1;
- const rle16_t* last_run = &run_container->runs[newit->run_index];
- newit->current_value = newit->highbits | (last_run->value + last_run->length);
- break;
- }
- default:
- // if this ever happens, bug!
- assert(false);
- }
- return true;
-}
-
-// prerequesite: the value should be in range of the container
-static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
- // Don't have to check return value because of prerequisite
- iter_new_container_partial_init(newit);
- uint16_t lb = val & 0xFFFF;
-
- switch (newit->typecode) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc = const_CAST_bitset(newit->container);
- newit->in_container_index =
- bitset_container_index_equalorlarger(bc, lb);
- newit->current_value = newit->highbits | newit->in_container_index;
- break; }
-
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac = const_CAST_array(newit->container);
- newit->in_container_index =
- array_container_index_equalorlarger(ac, lb);
- newit->current_value =
- newit->highbits | ac->array[newit->in_container_index];
- break; }
-
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(newit->container);
- newit->run_index = run_container_index_equalorlarger(rc, lb);
- if (rc->runs[newit->run_index].value <= lb) {
- newit->current_value = val;
- } else {
- newit->current_value =
- newit->highbits | rc->runs[newit->run_index].value;
- }
- break; }
-
- default:
- __builtin_unreachable();
- }
-
- return true;
-}
-
-static void roaring_init_iterator(const roaring_bitmap_t *r,
- roaring_uint32_iterator_t *newit) {
- newit->parent = r;
- newit->container_index = 0;
- newit->has_value = loadfirstvalue(newit);
-}
-
-static void roaring_init_iterator_last(const roaring_bitmap_t *r,
- roaring_uint32_iterator_t *newit) {
- newit->parent = r;
- newit->container_index = newit->parent->high_low_container.size - 1;
- newit->has_value = loadlastvalue(newit);
-}
-
-static roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) {
- roaring_uint32_iterator_t *newit =
- (roaring_uint32_iterator_t *)ndpi_malloc(sizeof(roaring_uint32_iterator_t));
- if (newit == NULL) return NULL;
- roaring_init_iterator(r, newit);
- return newit;
-}
-
-static roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
- const roaring_uint32_iterator_t *it) {
- roaring_uint32_iterator_t *newit =
- (roaring_uint32_iterator_t *)ndpi_malloc(sizeof(roaring_uint32_iterator_t));
- memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
- return newit;
-}
-
-static bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
- uint16_t hb = val >> 16;
- const int i = ra_get_index(& it->parent->high_low_container, hb);
- if (i >= 0) {
- uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
- uint16_t lb = val & 0xFFFF;
- if(lowvalue < lb ) {
- it->container_index = i+1; // will have to load first value of next container
- } else {// the value is necessarily within the range of the container
- it->container_index = i;
- it->has_value = loadfirstvalue_largeorequal(it, val);
- return it->has_value;
- }
- } else {
- // there is no matching, so we are going for the next container
- it->container_index = -i-1;
- }
- it->has_value = loadfirstvalue(it);
- return it->has_value;
-}
-
-
-static bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
- if (it->container_index >= it->parent->high_low_container.size) {
- return (it->has_value = false);
- }
- if (it->container_index < 0) {
- it->container_index = 0;
- return (it->has_value = loadfirstvalue(it));
- }
-
- switch (it->typecode) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc = const_CAST_bitset(it->container);
- it->in_container_index++;
-
- uint32_t wordindex = it->in_container_index / 64;
- if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
-
- uint64_t word = bc->words[wordindex] &
- (UINT64_MAX << (it->in_container_index % 64));
- // next part could be optimized/simplified
- while ((word == 0) &&
- (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
- wordindex++;
- word = bc->words[wordindex];
- }
- if (word != 0) {
- it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
- it->current_value = it->highbits | it->in_container_index;
- return (it->has_value = true);
- }
- break; }
-
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac = const_CAST_array(it->container);
- it->in_container_index++;
- if (it->in_container_index < ac->cardinality) {
- it->current_value =
- it->highbits | ac->array[it->in_container_index];
- return (it->has_value = true);
- }
- break; }
-
- case RUN_CONTAINER_TYPE: {
- if(it->current_value == UINT32_MAX) { // avoid overflow to zero
- return (it->has_value = false);
- }
-
- const run_container_t* rc = const_CAST_run(it->container);
- uint32_t limit = (it->highbits | (rc->runs[it->run_index].value +
- rc->runs[it->run_index].length));
- if (++it->current_value <= limit) {
- return (it->has_value = true);
- }
-
- if (++it->run_index < rc->n_runs) { // Assume the run has a value
- it->current_value =
- it->highbits | rc->runs[it->run_index].value;
- return (it->has_value = true);
- }
- break;
- }
-
- default:
- __builtin_unreachable();
- }
-
- // moving to next container
- it->container_index++;
- return (it->has_value = loadfirstvalue(it));
-}
-
-static bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
- if (it->container_index < 0) {
- return (it->has_value = false);
- }
- if (it->container_index >= it->parent->high_low_container.size) {
- it->container_index = it->parent->high_low_container.size - 1;
- return (it->has_value = loadlastvalue(it));
- }
-
- switch (it->typecode) {
- case BITSET_CONTAINER_TYPE: {
- if (--it->in_container_index < 0)
- break;
-
- const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
- int32_t wordindex = it->in_container_index / 64;
- uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
-
- while (word == 0 && --wordindex >= 0) {
- word = bitset_container->words[wordindex];
- }
- if (word == 0)
- break;
-
- int num_leading_zeros = __builtin_clzll(word);
- it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
- it->current_value = it->highbits | it->in_container_index;
- return (it->has_value = true);
- }
- case ARRAY_CONTAINER_TYPE: {
- if (--it->in_container_index < 0)
- break;
-
- const array_container_t* array_container = (const array_container_t*)it->container;
- it->current_value = it->highbits | array_container->array[it->in_container_index];
- return (it->has_value = true);
- }
- case RUN_CONTAINER_TYPE: {
- if(it->current_value == 0)
- return (it->has_value = false);
-
- const run_container_t* run_container = (const run_container_t*)it->container;
- if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
- return (it->has_value = true);
- }
-
- if (--it->run_index < 0)
- break;
-
- it->current_value = it->highbits | (run_container->runs[it->run_index].value +
- run_container->runs[it->run_index].length);
- return (it->has_value = true);
- }
- default:
- // if this ever happens, bug!
- assert(false);
- } // switch (typecode)
-
- // moving to previous container
- it->container_index--;
- return (it->has_value = loadlastvalue(it));
-}
-
-static uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
- uint32_t ret = 0;
- uint32_t num_values;
- uint32_t wordindex; // used for bitsets
- uint64_t word; // used for bitsets
- const array_container_t* acont; //TODO remove
- const run_container_t* rcont; //TODO remove
- const bitset_container_t* bcont; //TODO remove
-
- while (it->has_value && ret < count) {
- switch (it->typecode) {
- case BITSET_CONTAINER_TYPE:
- bcont = const_CAST_bitset(it->container);
- wordindex = it->in_container_index / 64;
- word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
- do {
- while (word != 0 && ret < count) {
- buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
- word = word & (word - 1);
- buf++;
- ret++;
- }
- while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
- wordindex++;
- word = bcont->words[wordindex];
- }
- } while (word != 0 && ret < count);
- it->has_value = (word != 0);
- if (it->has_value) {
- it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
- it->current_value = it->highbits | it->in_container_index;
- }
- break;
- case ARRAY_CONTAINER_TYPE:
- acont = const_CAST_array(it->container);
- num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
- uint32_t i; for (i = 0; i < num_values; i++) {
- buf[i] = it->highbits | acont->array[it->in_container_index + i];
- }
- buf += num_values;
- ret += num_values;
- it->in_container_index += num_values;
- it->has_value = (it->in_container_index < acont->cardinality);
- if (it->has_value) {
- it->current_value = it->highbits | acont->array[it->in_container_index];
- }
- break;
- case RUN_CONTAINER_TYPE:
- rcont = const_CAST_run(it->container);
- //"in_run_index" name is misleading, read it as "max_value_in_current_run"
- do {
- uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
- num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret);
- uint32_t i; for (i = 0; i < num_values; i++) {
- buf[i] = it->current_value + i;
- }
- it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
- buf += num_values;
- ret += num_values;
-
- if (it->current_value > largest_run_value || it->current_value == 0) {
- it->run_index++;
- if (it->run_index < rcont->n_runs) {
- it->current_value = it->highbits | rcont->runs[it->run_index].value;
- } else {
- it->has_value = false;
- }
- }
- } while ((ret < count) && it->has_value);
- break;
- default:
- assert(false);
- }
- if (it->has_value) {
- assert(ret == count);
- return ret;
- }
- it->container_index++;
- it->has_value = loadfirstvalue(it);
- }
- return ret;
-}
-
-
-
-static void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { ndpi_free(it); }
-
-/****
-* end of roaring_uint32_iterator_t
-*****/
-
-static bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
- const roaring_bitmap_t *r2) {
- const roaring_array_t *ra1 = &r1->high_low_container;
- const roaring_array_t *ra2 = &r2->high_low_container;
-
- if (ra1->size != ra2->size) {
- return false;
- }
- int i = 0; for (i = 0; i < ra1->size; ++i) {
- if (ra1->keys[i] != ra2->keys[i]) {
- return false;
- }
- }
- for (i = 0; i < ra1->size; ++i) {
- bool areequal = container_equals(ra1->containers[i],
- ra1->typecodes[i],
- ra2->containers[i],
- ra2->typecodes[i]);
- if (!areequal) {
- return false;
- }
- }
- return true;
-}
-
-static bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
- const roaring_bitmap_t *r2) {
- const roaring_array_t *ra1 = &r1->high_low_container;
- const roaring_array_t *ra2 = &r2->high_low_container;
-
- const int length1 = ra1->size,
- length2 = ra2->size;
-
- int pos1 = 0, pos2 = 0;
-
- while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(ra1, pos1);
- const uint16_t s2 = ra_get_key_at_index(ra2, pos2);
-
- if (s1 == s2) {
- uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2);
- if (!container_is_subset(c1, type1, c2, type2))
- return false;
- ++pos1;
- ++pos2;
- } else if (s1 < s2) { // s1 < s2
- return false;
- } else { // s1 > s2
- pos2 = ra_advance_until(ra2, s1, pos2);
- }
- }
- if (pos1 == length1)
- return true;
- else
- return false;
-}
-
-static void insert_flipped_container(roaring_array_t *ans_arr,
- const roaring_array_t *x1_arr, uint16_t hb,
- uint16_t lb_start, uint16_t lb_end) {
- const int i = ra_get_index(x1_arr, hb);
- const int j = ra_get_index(ans_arr, hb);
- uint8_t ctype_in, ctype_out;
- container_t *flipped_container = NULL;
- if (i >= 0) {
- container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
- flipped_container =
- container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
- (uint32_t)(lb_end + 1), &ctype_out);
-
- if (container_get_cardinality(flipped_container, ctype_out))
- ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
- ctype_out);
- else {
- container_free(flipped_container, ctype_out);
- }
- } else {
- flipped_container = container_range_of_ones(
- (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
- ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
- ctype_out);
- }
-}
-
-static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
- uint16_t lb_start, uint16_t lb_end) {
- const int i = ra_get_index(x1_arr, hb);
- uint8_t ctype_in, ctype_out;
- container_t *flipped_container = NULL;
- if (i >= 0) {
- container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
- flipped_container = container_inot_range(
- container_to_flip, ctype_in, (uint32_t)lb_start,
- (uint32_t)(lb_end + 1), &ctype_out);
- // if a new container was created, the old one was already freed
- if (container_get_cardinality(flipped_container, ctype_out)) {
- ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
- } else {
- container_free(flipped_container, ctype_out);
- ra_remove_at_index(x1_arr, i);
- }
-
- } else {
- flipped_container = container_range_of_ones(
- (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
- ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
- ctype_out);
- }
-}
-
-static void insert_fully_flipped_container(roaring_array_t *ans_arr,
- const roaring_array_t *x1_arr,
- uint16_t hb) {
- const int i = ra_get_index(x1_arr, hb);
- const int j = ra_get_index(ans_arr, hb);
- uint8_t ctype_in, ctype_out;
- container_t *flipped_container = NULL;
- if (i >= 0) {
- container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
- flipped_container =
- container_not(container_to_flip, ctype_in, &ctype_out);
- if (container_get_cardinality(flipped_container, ctype_out))
- ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
- ctype_out);
- else {
- container_free(flipped_container, ctype_out);
- }
- } else {
- flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
- ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
- ctype_out);
- }
-}
-
-static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
- const int i = ra_get_index(x1_arr, hb);
- uint8_t ctype_in, ctype_out;
- container_t *flipped_container = NULL;
- if (i >= 0) {
- container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
- flipped_container =
- container_inot(container_to_flip, ctype_in, &ctype_out);
-
- if (container_get_cardinality(flipped_container, ctype_out)) {
- ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
- } else {
- container_free(flipped_container, ctype_out);
- ra_remove_at_index(x1_arr, i);
- }
-
- } else {
- flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
- ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
- ctype_out);
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
- uint64_t range_start,
- uint64_t range_end) {
- if (range_start >= range_end) {
- return roaring_bitmap_copy(x1);
- }
- if(range_end >= UINT64_C(0x100000000)) {
- range_end = UINT64_C(0x100000000);
- }
-
- roaring_bitmap_t *ans = roaring_bitmap_create();
- roaring_bitmap_set_copy_on_write(ans, is_cow(x1));
-
- uint16_t hb_start = (uint16_t)(range_start >> 16);
- const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
- uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
- const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
-
- ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
- hb_start, is_cow(x1));
- if (hb_start == hb_end) {
- insert_flipped_container(&ans->high_low_container,
- &x1->high_low_container, hb_start, lb_start,
- lb_end);
- } else {
- // start and end containers are distinct
- if (lb_start > 0) {
- // handle first (partial) container
- insert_flipped_container(&ans->high_low_container,
- &x1->high_low_container, hb_start,
- lb_start, 0xFFFF);
- ++hb_start; // for the full containers. Can't wrap.
- }
-
- if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
-
- uint32_t hb; for (hb = hb_start; hb <= hb_end; ++hb) {
- insert_fully_flipped_container(&ans->high_low_container,
- &x1->high_low_container, hb);
- }
-
- // handle a partial final container
- if (lb_end != 0xFFFF) {
- insert_flipped_container(&ans->high_low_container,
- &x1->high_low_container, hb_end + 1, 0,
- lb_end);
- ++hb_end;
- }
- }
- ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
- hb_end, is_cow(x1));
- return ans;
-}
-
-static void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
- uint64_t range_end) {
- if (range_start >= range_end) {
- return; // empty range
- }
- if(range_end >= UINT64_C(0x100000000)) {
- range_end = UINT64_C(0x100000000);
- }
-
- uint16_t hb_start = (uint16_t)(range_start >> 16);
- const uint16_t lb_start = (uint16_t)range_start;
- uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
- const uint16_t lb_end = (uint16_t)(range_end - 1);
-
- if (hb_start == hb_end) {
- inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
- lb_end);
- } else {
- // start and end containers are distinct
- if (lb_start > 0) {
- // handle first (partial) container
- inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
- 0xFFFF);
- ++hb_start; // for the full containers. Can't wrap.
- }
-
- if (lb_end != 0xFFFF) --hb_end;
-
- uint32_t hb; for (hb = hb_start; hb <= hb_end; ++hb) {
- inplace_fully_flip_container(&x1->high_low_container, hb);
- }
- // handle a partial final container
- if (lb_end != 0xFFFF) {
- inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
- lb_end);
- ++hb_end;
- }
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2,
- const bool bitsetconversion) {
- uint8_t result_type = 0;
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- if (0 == length1) {
- return roaring_bitmap_copy(x2);
- }
- if (0 == length2) {
- return roaring_bitmap_copy(x1);
- }
- roaring_bitmap_t *answer =
- roaring_bitmap_create_with_capacity(length1 + length2);
- roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c;
- if (bitsetconversion &&
- (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) &&
- (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)
- ){
- container_t *newc1 =
- container_mutable_unwrap_shared(c1, &type1);
- newc1 = container_to_bitset(newc1, type1);
- type1 = BITSET_CONTAINER_TYPE;
- c = container_lazy_ior(newc1, type1, c2, type2,
- &result_type);
- if (c != newc1) { // should not happen
- container_free(newc1, type1);
- }
- } else {
- c = container_lazy_or(c1, type1, c2, type2, &result_type);
- }
- // since we assume that the initial containers are non-empty,
- // the
- // result here
- // can only be non-empty
- ra_append(&answer->high_low_container, s1, c, result_type);
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- c1 = get_copy_of_container(c1, &type1, is_cow(x1));
- if (is_cow(x1)) {
- ra_set_container_at_index(&x1->high_low_container, pos1, c1,
- type1);
- }
- ra_append(&answer->high_low_container, s1, c1, type1);
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
- ra_append(&answer->high_low_container, s2, c2, type2);
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&answer->high_low_container,
- &x2->high_low_container, pos2, length2,
- is_cow(x2));
- } else if (pos2 == length2) {
- ra_append_copy_range(&answer->high_low_container,
- &x1->high_low_container, pos1, length1,
- is_cow(x1));
- }
- return answer;
-}
-
-static void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2,
- const bool bitsetconversion) {
- uint8_t result_type = 0;
- int length1 = x1->high_low_container.size;
- const int length2 = x2->high_low_container.size;
-
- if (0 == length2) return;
-
- if (0 == length1) {
- roaring_bitmap_overwrite(x1, x2);
- return;
- }
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- if (!container_is_full(c1, type1)) {
- if ((bitsetconversion == false) ||
- (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)
- ){
- c1 = get_writable_copy_if_shared(c1, &type1);
- } else {
- // convert to bitset
- container_t *old_c1 = c1;
- uint8_t old_type1 = type1;
- c1 = container_mutable_unwrap_shared(c1, &type1);
- c1 = container_to_bitset(c1, type1);
- container_free(old_c1, old_type1);
- type1 = BITSET_CONTAINER_TYPE;
- }
-
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_lazy_ior(c1, type1, c2, type2,
- &result_type);
-
- if (c != c1) { // in this instance a new container was created,
- // and we need to free the old one
- container_free(c1, type1);
- }
-
- ra_set_container_at_index(&x1->high_low_container, pos1, c,
- result_type);
- }
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- // container_t *c2_clone = container_clone(c2, type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
- ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
- type2);
- pos1++;
- length1++;
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
- pos2, length2, is_cow(x2));
- }
-}
-
-static roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- uint8_t result_type = 0;
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- if (0 == length1) {
- return roaring_bitmap_copy(x2);
- }
- if (0 == length2) {
- return roaring_bitmap_copy(x1);
- }
- roaring_bitmap_t *answer =
- roaring_bitmap_create_with_capacity(length1 + length2);
- roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_lazy_xor(
- c1, type1, c2, type2, &result_type);
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_append(&answer->high_low_container, s1, c, result_type);
- } else {
- container_free(c, result_type);
- }
-
- ++pos1;
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- c1 = get_copy_of_container(c1, &type1, is_cow(x1));
- if (is_cow(x1)) {
- ra_set_container_at_index(&x1->high_low_container, pos1, c1,
- type1);
- }
- ra_append(&answer->high_low_container, s1, c1, type1);
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
- ra_append(&answer->high_low_container, s2, c2, type2);
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&answer->high_low_container,
- &x2->high_low_container, pos2, length2,
- is_cow(x2));
- } else if (pos2 == length2) {
- ra_append_copy_range(&answer->high_low_container,
- &x1->high_low_container, pos1, length1,
- is_cow(x1));
- }
- return answer;
-}
-
-static void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- assert(x1 != x2);
- uint8_t result_type = 0;
- int length1 = x1->high_low_container.size;
- const int length2 = x2->high_low_container.size;
-
- if (0 == length2) return;
-
- if (0 == length1) {
- roaring_bitmap_overwrite(x1, x2);
- return;
- }
- int pos1 = 0, pos2 = 0;
- uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- while (true) {
- if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
-
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
-
- container_t *c;
- if (type1 == SHARED_CONTAINER_TYPE) {
- c = container_lazy_xor(c1, type1, c2, type2, &result_type);
- shared_container_free(CAST_shared(c1)); // release
- }
- else {
- c = container_lazy_ixor(c1, type1, c2, type2, &result_type);
- }
-
- if (container_nonzero_cardinality(c, result_type)) {
- ra_set_container_at_index(&x1->high_low_container, pos1, c,
- result_type);
- ++pos1;
- } else {
- container_free(c, result_type);
- ra_remove_at_index(&x1->high_low_container, pos1);
- --length1;
- }
- ++pos2;
- if (pos1 == length1) break;
- if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- } else if (s1 < s2) { // s1 < s2
- pos1++;
- if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
-
- } else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- // container_t *c2_clone = container_clone(c2, type2);
- c2 = get_copy_of_container(c2, &type2, is_cow(x2));
- if (is_cow(x2)) {
- ra_set_container_at_index(&x2->high_low_container, pos2, c2,
- type2);
- }
- ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
- type2);
- pos1++;
- length1++;
- pos2++;
- if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
- }
- }
- if (pos1 == length1) {
- ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
- pos2, length2, is_cow(x2));
- }
-}
-
-static void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
- roaring_array_t *ra = &r->high_low_container;
-
- int i = 0; for (i = 0; i < ra->size; ++i) {
- const uint8_t old_type = ra->typecodes[i];
- container_t *old_c = ra->containers[i];
- uint8_t new_type = old_type;
- container_t *new_c = container_repair_after_lazy(old_c, &new_type);
- ra->containers[i] = new_c;
- ra->typecodes[i] = new_type;
- }
-}
-
-
-
-/**
-* roaring_bitmap_rank returns the number of integers that are smaller or equal
-* to x.
-*/
-static uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
- uint64_t size = 0;
- uint32_t xhigh = x >> 16;
- int i = 0; for (i = 0; i < bm->high_low_container.size; i++) {
- uint32_t key = bm->high_low_container.keys[i];
- if (xhigh > key) {
- size +=
- container_get_cardinality(bm->high_low_container.containers[i],
- bm->high_low_container.typecodes[i]);
- } else if (xhigh == key) {
- return size + container_rank(bm->high_low_container.containers[i],
- bm->high_low_container.typecodes[i],
- x & 0xFFFF);
- } else {
- return size;
- }
- }
- return size;
-}
-
-/**
-* roaring_bitmap_smallest returns the smallest value in the set.
-* Returns UINT32_MAX if the set is empty.
-*/
-static uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
- if (bm->high_low_container.size > 0) {
- container_t *c = bm->high_low_container.containers[0];
- uint8_t type = bm->high_low_container.typecodes[0];
- uint32_t key = bm->high_low_container.keys[0];
- uint32_t lowvalue = container_minimum(c, type);
- return lowvalue | (key << 16);
- }
- return UINT32_MAX;
-}
-
-/**
-* roaring_bitmap_smallest returns the greatest value in the set.
-* Returns 0 if the set is empty.
-*/
-static uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
- if (bm->high_low_container.size > 0) {
- container_t *container =
- bm->high_low_container.containers[bm->high_low_container.size - 1];
- uint8_t typecode =
- bm->high_low_container.typecodes[bm->high_low_container.size - 1];
- uint32_t key =
- bm->high_low_container.keys[bm->high_low_container.size - 1];
- uint32_t lowvalue = container_maximum(container, typecode);
- return lowvalue | (key << 16);
- }
- return 0;
-}
-
-static bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
- uint32_t *element) {
- container_t *container;
- uint8_t typecode;
- uint16_t key;
- uint32_t start_rank = 0;
- int i = 0;
- bool valid = false;
- while (!valid && i < bm->high_low_container.size) {
- container = bm->high_low_container.containers[i];
- typecode = bm->high_low_container.typecodes[i];
- valid =
- container_select(container, typecode, &start_rank, rank, element);
- i++;
- }
-
- if (valid) {
- key = bm->high_low_container.keys[i - 1];
- *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed
- return true;
- } else
- return false;
-}
-
-static bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- uint64_t answer = 0;
- int pos1 = 0, pos2 = 0;
-
- while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
-
- if (s1 == s2) {
- uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- if (container_intersect(c1, type1, c2, type2))
- return true;
- ++pos1;
- ++pos2;
- } else if (s1 < s2) { // s1 < s2
- pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
- } else { // s1 > s2
- pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
- }
- }
- return answer != 0;
-}
-
-static bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
- uint64_t x, uint64_t y) {
- if (x >= y) {
- // Empty range.
- return false;
- }
- roaring_uint32_iterator_t it;
- roaring_init_iterator(bm, &it);
- if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) {
- // No values above x.
- return false;
- }
- if (it.current_value >= y) {
- // No values below y.
- return false;
- }
- return true;
-}
-
-
-static uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- const int length1 = x1->high_low_container.size,
- length2 = x2->high_low_container.size;
- uint64_t answer = 0;
- int pos1 = 0, pos2 = 0;
-
- while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
-
- if (s1 == s2) {
- uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- answer += container_and_cardinality(c1, type1, c2, type2);
- ++pos1;
- ++pos2;
- } else if (s1 < s2) { // s1 < s2
- pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
- } else { // s1 > s2
- pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
- }
- }
- return answer;
-}
-
-static double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
- const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
- const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
- return (double)inter / (double)(c1 + c2 - inter);
-}
-
-static uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
- const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
- const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
- return c1 + c2 - inter;
-}
-
-static uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
- const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
- return c1 - inter;
-}
-
-static uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
- const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
- const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
- const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
- return c1 + c2 - 2 * inter;
-}
-
-
-static bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
- const uint16_t hb = val >> 16;
- /*
- * the next function call involves a binary search and lots of branching.
- */
- int32_t i = ra_get_index(&r->high_low_container, hb);
- if (i < 0) return false;
-
- uint8_t typecode;
- // next call ought to be cheap
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
- // rest might be a tad expensive, possibly involving another round of binary search
- return container_contains(container, val & 0xFFFF, typecode);
-}
-
-
-/**
- * Check whether a range of values from range_start (included) to range_end (excluded) is present
- */
-static bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
- if(range_end >= UINT64_C(0x100000000)) {
- range_end = UINT64_C(0x100000000);
- }
- if (range_start >= range_end) return true; // empty range are always contained!
- if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
- uint16_t hb_rs = (uint16_t)(range_start >> 16);
- uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
- const int32_t span = hb_re - hb_rs;
- const int32_t hlc_sz = ra_get_size(&r->high_low_container);
- if (hlc_sz < span + 1) {
- return false;
- }
- int32_t is = ra_get_index(&r->high_low_container, hb_rs);
- int32_t ie = ra_get_index(&r->high_low_container, hb_re);
- ie = (ie < 0 ? -ie - 1 : ie);
- if ((is < 0) || ((ie - is) != span)) {
- return false;
- }
- const uint32_t lb_rs = range_start & 0xFFFF;
- const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
- uint8_t type;
- container_t *c = ra_get_container_at_index(&r->high_low_container, is,
- &type);
- if (hb_rs == hb_re) {
- return container_contains_range(c, lb_rs, lb_re, type);
- }
- if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
- return false;
- }
- assert(ie < hlc_sz); // would indicate an algorithmic bug
- c = ra_get_container_at_index(&r->high_low_container, ie, &type);
- if (!container_contains_range(c, 0, lb_re, type)) {
- return false;
- }
- int32_t i; for (i = is + 1; i < ie; ++i) {
- c = ra_get_container_at_index(&r->high_low_container, i, &type);
- if (!container_is_full(c, type) ) {
- return false;
- }
- }
- return true;
-}
-
-
-static bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
- const roaring_bitmap_t *r2) {
- return (roaring_bitmap_get_cardinality(r2) >
- roaring_bitmap_get_cardinality(r1) &&
- roaring_bitmap_is_subset(r1, r2));
-}
-
-
-/*
- * FROZEN SERIALIZATION FORMAT DESCRIPTION
- *
- * -- (beginning must be aligned by 32 bytes) --
- * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
- * <run_data> rle16_t[total number of rle elements in all run containers]
- * <array_data> uint16_t[total number of array elements in all array containers]
- * <keys> uint16_t[num_containers]
- * <counts> uint16_t[num_containers]
- * <typecodes> uint8_t[num_containers]
- * <header> uint32_t
- *
- * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
- * and the number of containers (17 bits).
- *
- * <counts> stores number of elements for every container.
- * Its meaning depends on container type.
- * For array and bitset containers, this value is the container cardinality minus one.
- * For run container, it is the number of rle_t elements (n_runs).
- *
- * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
- * all containers of respective type.
- *
- * <*_data> and <keys> are kept close together because they are not accessed
- * during deserilization. This may reduce IO in case of large mmaped bitmaps.
- * All members have their native alignments during deserilization except <header>,
- * which is not guaranteed to be aligned by 4 bytes.
- */
-
-static size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
- const roaring_array_t *ra = &rb->high_low_container;
- size_t num_bytes = 0;
- int32_t i; for (i = 0; i < ra->size; i++) {
- switch (ra->typecodes[i]) {
- case BITSET_CONTAINER_TYPE: {
- num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
- break;
- }
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(ra->containers[i]);
- num_bytes += rc->n_runs * sizeof(rle16_t);
- break;
- }
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac =
- const_CAST_array(ra->containers[i]);
- num_bytes += ac->cardinality * sizeof(uint16_t);
- break;
- }
- default:
- __builtin_unreachable();
- }
- }
- num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
- num_bytes += 4; // header
- return num_bytes;
-}
-
-static inline void *arena_alloc(char **arena, size_t num_bytes) {
- char *res = *arena;
- *arena += num_bytes;
- return res;
-}
-
-static void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
- /*
- * Note: we do not require user to supply a specifically aligned buffer.
- * Thus we have to use memcpy() everywhere.
- */
-
- const roaring_array_t *ra = &rb->high_low_container;
-
- size_t bitset_zone_size = 0;
- size_t run_zone_size = 0;
- size_t array_zone_size = 0;
- int32_t i; for (i = 0; i < ra->size; i++) {
- switch (ra->typecodes[i]) {
- case BITSET_CONTAINER_TYPE: {
- bitset_zone_size +=
- BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
- break;
- }
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(ra->containers[i]);
- run_zone_size += rc->n_runs * sizeof(rle16_t);
- break;
- }
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac =
- const_CAST_array(ra->containers[i]);
- array_zone_size += ac->cardinality * sizeof(uint16_t);
- break;
- }
- default:
- __builtin_unreachable();
- }
- }
-
- uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
- rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
- uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
- uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
- uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
- uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
- uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);
-
- for (i = 0; i < ra->size; i++) {
- uint16_t count;
- switch (ra->typecodes[i]) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc =
- const_CAST_bitset(ra->containers[i]);
- memcpy(bitset_zone, bc->words,
- BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
- bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
- if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) {
- count = bc->cardinality - 1;
- } else {
- count = bitset_container_compute_cardinality(bc) - 1;
- }
- break;
- }
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(ra->containers[i]);
- size_t num_bytes = rc->n_runs * sizeof(rle16_t);
- memcpy(run_zone, rc->runs, num_bytes);
- run_zone += rc->n_runs;
- count = rc->n_runs;
- break;
- }
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac =
- const_CAST_array(ra->containers[i]);
- size_t num_bytes = ac->cardinality * sizeof(uint16_t);
- memcpy(array_zone, ac->array, num_bytes);
- array_zone += ac->cardinality;
- count = ac->cardinality - 1;
- break;
- }
- default:
- __builtin_unreachable();
- }
- memcpy(&count_zone[i], &count, 2);
- }
- memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t));
- memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t));
- uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
- memcpy(header_zone, &header, 4);
-}
-
-static const roaring_bitmap_t *
-roaring_bitmap_frozen_view(const char *buf, size_t length) {
- if ((uintptr_t)buf % 32 != 0) {
- return NULL;
- }
-
- // cookie and num_containers
- if (length < 4) {
- return NULL;
- }
- uint32_t header;
- memcpy(&header, buf + length - 4, 4); // header may be misaligned
- if ((header & 0x7FFF) != FROZEN_COOKIE) {
- return NULL;
- }
- int32_t num_containers = (header >> 15);
-
- // typecodes, counts and keys
- if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
- return NULL;
- }
- uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
- uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
- uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);
-
- // {bitset,array,run}_zone
- int32_t num_bitset_containers = 0;
- int32_t num_run_containers = 0;
- int32_t num_array_containers = 0;
- size_t bitset_zone_size = 0;
- size_t run_zone_size = 0;
- size_t array_zone_size = 0;
- int32_t i; for (i = 0; i < num_containers; i++) {
- switch (typecodes[i]) {
- case BITSET_CONTAINER_TYPE:
- num_bitset_containers++;
- bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
- break;
- case RUN_CONTAINER_TYPE:
- num_run_containers++;
- run_zone_size += counts[i] * sizeof(rle16_t);
- break;
- case ARRAY_CONTAINER_TYPE:
- num_array_containers++;
- array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
- break;
- default:
- return NULL;
- }
- }
- if (length != bitset_zone_size + run_zone_size + array_zone_size +
- 5 * num_containers + 4) {
- return NULL;
- }
- uint64_t *bitset_zone = (uint64_t*) (buf);
- rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
- uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
-
- size_t alloc_size = 0;
- alloc_size += sizeof(roaring_bitmap_t);
- alloc_size += num_containers * sizeof(container_t*);
- alloc_size += num_bitset_containers * sizeof(bitset_container_t);
- alloc_size += num_run_containers * sizeof(run_container_t);
- alloc_size += num_array_containers * sizeof(array_container_t);
-
- char *arena = (char *)ndpi_malloc(alloc_size);
- if (arena == NULL) {
- return NULL;
- }
-
- roaring_bitmap_t *rb = (roaring_bitmap_t *)
- arena_alloc(&arena, sizeof(roaring_bitmap_t));
- rb->high_low_container.flags = ROARING_FLAG_FROZEN;
- rb->high_low_container.allocation_size = num_containers;
- rb->high_low_container.size = num_containers;
- rb->high_low_container.keys = (uint16_t *)keys;
- rb->high_low_container.typecodes = (uint8_t *)typecodes;
- rb->high_low_container.containers =
- (container_t **)arena_alloc(&arena,
- sizeof(container_t*) * num_containers);
- for (i = 0; i < num_containers; i++) {
- switch (typecodes[i]) {
- case BITSET_CONTAINER_TYPE: {
- bitset_container_t *bitset = (bitset_container_t *)
- arena_alloc(&arena, sizeof(bitset_container_t));
- bitset->words = bitset_zone;
- bitset->cardinality = counts[i] + UINT32_C(1);
- rb->high_low_container.containers[i] = bitset;
- bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
- break;
- }
- case RUN_CONTAINER_TYPE: {
- run_container_t *run = (run_container_t *)
- arena_alloc(&arena, sizeof(run_container_t));
- run->capacity = counts[i];
- run->n_runs = counts[i];
- run->runs = run_zone;
- rb->high_low_container.containers[i] = run;
- run_zone += run->n_runs;
- break;
- }
- case ARRAY_CONTAINER_TYPE: {
- array_container_t *array = (array_container_t *)
- arena_alloc(&arena, sizeof(array_container_t));
- array->capacity = counts[i] + UINT32_C(1);
- array->cardinality = counts[i] + UINT32_C(1);
- array->array = array_zone;
- rb->high_low_container.containers[i] = array;
- array_zone += counts[i] + UINT32_C(1);
- break;
- }
- default:
- ndpi_free(arena);
- return NULL;
- }
- }
-
- return rb;
-}
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring {
-#endif
-/* end file src/roaring.c */
/* begin file src/array_util.c */
#include <assert.h>
#include <stdbool.h>
@@ -11624,7 +7401,7 @@ static const uint8_t shuffle_mask16[] = {
* Optimized by D. Lemire on May 3rd 2013
*/
CROARING_TARGET_AVX2
-static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
+int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
const uint16_t *__restrict__ B, size_t s_b,
uint16_t *C) {
size_t count = 0;
@@ -11641,7 +7418,7 @@ static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
v_b, vectorlength, v_a, vectorlength,
_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
const int r = _mm_extract_epi32(res_v, 0);
- __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r);
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
__m128i p = _mm_shuffle_epi8(v_a, sm16);
_mm_storeu_si128((__m128i *)&C[count], p); // can overflow
count += _mm_popcnt_u32(r);
@@ -11665,7 +7442,7 @@ static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
const int r = _mm_extract_epi32(res_v, 0);
__m128i sm16 =
- _mm_load_si128((const __m128i *)shuffle_mask16 + r);
+ _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
__m128i p = _mm_shuffle_epi8(v_a, sm16);
_mm_storeu_si128((__m128i *)&C[count], p); // can overflow
count += _mm_popcnt_u32(r);
@@ -11703,7 +7480,7 @@ static int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
CROARING_UNTARGET_REGION
CROARING_TARGET_AVX2
-static int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
+int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
size_t s_a,
const uint16_t *__restrict__ B,
size_t s_b) {
@@ -11779,7 +7556,7 @@ CROARING_TARGET_AVX2
// Warning:
// This function may not be safe if A == C or B == C.
/////////
-static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
+int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
const uint16_t *__restrict__ B, size_t s_b,
uint16_t *C) {
// we handle the degenerate case
@@ -11842,7 +7619,7 @@ static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
const int bitmask_belongs_to_difference =
_mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
/*** next few lines are probably expensive *****/
- __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
bitmask_belongs_to_difference);
__m128i p = _mm_shuffle_epi8(v_a, sm16);
_mm_storeu_si128((__m128i *)&C[count], p); // can overflow
@@ -11877,7 +7654,7 @@ static int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
_mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
const int bitmask_belongs_to_difference =
_mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
- __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
bitmask_belongs_to_difference);
__m128i p = _mm_shuffle_epi8(v_a, sm16);
_mm_storeu_si128((__m128i *)&C[count], p); // can overflow
@@ -11983,7 +7760,7 @@ static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
* and binarySearch2. This approach can be slightly superior to a conventional
* galloping search in some instances.
*/
-static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s,
+int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
const uint16_t *large, size_t size_l,
uint16_t *buffer) {
size_t pos = 0, idx_l = 0, idx_s = 0;
@@ -11993,10 +7770,10 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s,
}
int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
- uint16_t target1 = small_set[idx_s];
- uint16_t target2 = small_set[idx_s + 1];
- uint16_t target3 = small_set[idx_s + 2];
- uint16_t target4 = small_set[idx_s + 3];
+ uint16_t target1 = small[idx_s];
+ uint16_t target2 = small[idx_s + 1];
+ uint16_t target3 = small[idx_s + 2];
+ uint16_t target4 = small[idx_s + 3];
binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3,
target4, &index1, &index2, &index3, &index4);
if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
@@ -12015,8 +7792,8 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s,
idx_l += index4;
}
if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
- uint16_t target1 = small_set[idx_s];
- uint16_t target2 = small_set[idx_s + 1];
+ uint16_t target1 = small[idx_s];
+ uint16_t target2 = small[idx_s + 1];
binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1,
&index2);
if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
@@ -12029,7 +7806,7 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s,
idx_l += index2;
}
if ((idx_s < size_s) && (idx_l < size_l)) {
- uint16_t val_s = small_set[idx_s];
+ uint16_t val_s = small[idx_s];
int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
if (index >= 0)
buffer[pos++] = val_s;
@@ -12040,7 +7817,7 @@ static int32_t intersect_skewed_uint16(const uint16_t *small_set, size_t size_s,
// TODO: this could be accelerated, possibly, by using binarySearch4 as above.
-static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set,
+int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
size_t size_s,
const uint16_t *large,
size_t size_l) {
@@ -12050,7 +7827,7 @@ static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set,
return 0;
}
- uint16_t val_l = large[idx_l], val_s = small_set[idx_s];
+ uint16_t val_l = large[idx_l], val_s = small[idx_s];
while (true) {
if (val_l < val_s) {
@@ -12060,12 +7837,12 @@ static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set,
} else if (val_s < val_l) {
idx_s++;
if (idx_s == size_s) break;
- val_s = small_set[idx_s];
+ val_s = small[idx_s];
} else {
pos++;
idx_s++;
if (idx_s == size_s) break;
- val_s = small_set[idx_s];
+ val_s = small[idx_s];
idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
if (idx_l == size_l) break;
val_l = large[idx_l];
@@ -12075,7 +7852,7 @@ static int32_t intersect_skewed_uint16_cardinality(const uint16_t *small_set,
return (int32_t)pos;
}
-bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s,
+bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s,
const uint16_t *large, size_t size_l) {
size_t idx_l = 0, idx_s = 0;
@@ -12083,7 +7860,7 @@ bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s,
return false;
}
- uint16_t val_l = large[idx_l], val_s = small_set[idx_s];
+ uint16_t val_l = large[idx_l], val_s = small[idx_s];
while (true) {
if (val_l < val_s) {
@@ -12093,7 +7870,7 @@ bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s,
} else if (val_s < val_l) {
idx_s++;
if (idx_s == size_s) break;
- val_s = small_set[idx_s];
+ val_s = small[idx_s];
} else {
return true;
}
@@ -12105,7 +7882,7 @@ bool intersect_skewed_uint16_nonempty(const uint16_t *small_set, size_t size_s,
/**
* Generic intersection function.
*/
-static int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
+int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
const uint16_t *B, const size_t lenB, uint16_t *out) {
const uint16_t *initout = out;
if (lenA == 0 || lenB == 0) return 0;
@@ -12130,7 +7907,7 @@ static int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
return (int32_t)(out - initout); // NOTREACHED
}
-static int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
+int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
const uint16_t *B, const size_t lenB) {
int32_t answer = 0;
if (lenA == 0 || lenB == 0) return 0;
@@ -12156,7 +7933,7 @@ static int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA
}
-static bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
+bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
const uint16_t *B, const size_t lenB) {
if (lenA == 0 || lenB == 0) return 0;
const uint16_t *endA = A + lenA;
@@ -12184,7 +7961,7 @@ static bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
/**
* Generic intersection function.
*/
-static size_t intersection_uint32(const uint32_t *A, const size_t lenA,
+size_t intersection_uint32(const uint32_t *A, const size_t lenA,
const uint32_t *B, const size_t lenB,
uint32_t *out) {
const uint32_t *initout = out;
@@ -12210,7 +7987,7 @@ static size_t intersection_uint32(const uint32_t *A, const size_t lenA,
return (out - initout); // NOTREACHED
}
-static size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
+size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
const uint32_t *B, const size_t lenB) {
if (lenA == 0 || lenB == 0) return 0;
size_t card = 0;
@@ -12238,7 +8015,7 @@ static size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
// can one vectorize the computation of the union? (Update: Yes! See
// union_vector16).
-static size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
size_t size_2, uint16_t *buffer) {
size_t pos = 0, idx_1 = 0, idx_2 = 0;
@@ -12287,7 +8064,7 @@ static size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t
return pos;
}
-static int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
+int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
int length2, uint16_t *a_out) {
int out_card = 0;
int k1 = 0, k2 = 0;
@@ -12332,7 +8109,7 @@ static int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2
return out_card;
}
-static int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
+int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
const uint16_t *array_2, int32_t card_2, uint16_t *out) {
int32_t pos1 = 0, pos2 = 0, pos_out = 0;
while (pos1 < card_1 && pos2 < card_2) {
@@ -12770,7 +8547,7 @@ CROARING_UNTARGET_REGION
// could be avoided?
static inline uint32_t unique(uint16_t *out, uint32_t len) {
uint32_t pos = 1;
- uint32_t i; for (i = 1; i < len; ++i) {
+ for (uint32_t i = 1; i < len; ++i) {
if (out[i] != out[i - 1]) {
out[pos++] = out[i];
}
@@ -12786,7 +8563,7 @@ static int uint16_compare(const void *a, const void *b) {
CROARING_TARGET_AVX2
// a one-pass SSE union algorithm
// This function may not be safe if array1 == output or array2 == output.
-static uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
+uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
const uint16_t *__restrict__ array2, uint32_t length2,
uint16_t *__restrict__ output) {
if ((length1 < 8) || (length2 < 8)) {
@@ -12900,7 +8677,7 @@ CROARING_UNTARGET_REGION
// could be avoided? Warning: assumes len > 0
static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
uint32_t pos = 1;
- uint32_t i; for (i = 1; i < len; ++i) {
+ for (uint32_t i = 1; i < len; ++i) {
if (out[i] != out[i - 1]) {
out[pos++] = out[i];
} else
@@ -12910,7 +8687,7 @@ static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
}
CROARING_TARGET_AVX2
// a one-pass SSE xor algorithm
-static uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
+uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
const uint16_t *__restrict__ array2, uint32_t length2,
uint16_t *__restrict__ output) {
if ((length1 < 8) || (length2 < 8)) {
@@ -13019,7 +8796,7 @@ CROARING_UNTARGET_REGION
#endif // CROARING_IS_X64
-static size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
+size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
size_t size_2, uint32_t *buffer) {
size_t pos = 0, idx_1 = 0, idx_2 = 0;
@@ -13068,7 +8845,7 @@ static size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t
return pos;
}
-static size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
+size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
const uint32_t *set_2, size_t size_2) {
size_t pos = 0, idx_1 = 0, idx_2 = 0;
@@ -13114,7 +8891,7 @@ static size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
-static size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
size_t size_2, uint16_t *buffer) {
#ifdef CROARING_IS_X64
if( croaring_avx2() ) {
@@ -13168,8 +8945,9 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
}
while (ptr1 < end8) {
- uint64_t v1 = *((const uint64_t*)ptr1);
- uint64_t v2 = *((const uint64_t*)ptr2);
+ uint64_t v1, v2;
+ memcpy(&v1,ptr1,sizeof(uint64_t));
+ memcpy(&v2,ptr2,sizeof(uint64_t));
if (v1 != v2) {
return false;
}
@@ -13190,7 +8968,7 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
CROARING_UNTARGET_REGION
#endif
-static bool memequals(const void *s1, const void *s2, size_t n) {
+bool memequals(const void *s1, const void *s2, size_t n) {
if (n == 0) {
return true;
}
@@ -13209,6 +8987,1017 @@ static bool memequals(const void *s1, const void *s2, size_t n) {
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/array_util.c */
+/* begin file src/bitset_util.c */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+#ifdef CROARING_IS_X64
+static uint8_t lengthTable[256] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+#endif
+
+#ifdef CROARING_IS_X64
+ALIGNED(32)
+static uint32_t vecDecodeTable[256][8] = {
+ {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
+ {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
+ {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
+ {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
+ {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
+ {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
+ {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
+ {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
+ {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
+ {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
+ {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
+ {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
+ {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
+ {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
+ {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
+ {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
+ {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
+ {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
+ {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
+ {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
+ {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
+ {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
+ {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
+ {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
+ {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
+ {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
+ {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
+ {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
+ {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
+ {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
+ {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
+ {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
+ {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
+ {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
+ {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
+ {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
+ {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
+ {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
+ {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
+ {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
+ {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
+ {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
+ {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
+ {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
+ {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
+ {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
+ {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
+ {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
+ {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
+ {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
+ {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
+ {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
+ {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
+ {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
+ {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
+ {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
+ {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
+ {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
+ {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
+ {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
+ {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
+ {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
+ {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
+ {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
+ {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
+ {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
+ {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
+ {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
+ {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
+ {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
+ {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
+ {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
+ {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
+ {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
+ {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
+ {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
+ {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
+ {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
+ {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
+ {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
+ {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
+ {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
+ {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
+ {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
+ {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
+ {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
+ {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
+ {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
+ {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
+ {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
+ {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
+ {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
+ {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
+ {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
+ {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
+ {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
+ {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
+ {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
+ {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
+ {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
+ {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
+ {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
+ {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
+ {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
+ {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
+ {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
+ {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
+ {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
+ {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
+ {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
+ {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
+ {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
+ {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
+ {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
+ {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
+ {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
+ {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
+ {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
+ {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
+ {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
+ {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
+ {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
+ {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
+ {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
+ {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
+ {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
+ {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
+ {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
+ {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
+ {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
+ {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
+ {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
+ {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
+ {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
+ {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
+ {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
+ {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
+ {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
+ {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
+ {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
+ {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
+ {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
+ {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
+ {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
+ {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
+ {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
+ {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
+ {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
+ {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
+ {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
+ {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
+ {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
+ {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
+ {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
+ {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
+ {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
+ {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
+ {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
+ {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
+ {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
+ {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
+ {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
+ {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
+ {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
+ {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
+ {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
+ {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
+ {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
+ {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
+ {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
+ {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
+ {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
+ {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
+ {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
+ {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
+ {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
+ {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
+ {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
+ {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
+ {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
+ {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
+ {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
+ {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
+ {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
+ {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
+ {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
+ {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
+ {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
+ {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
+ {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
+ {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
+ {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
+ {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
+ {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
+ {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
+ {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
+ {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
+ {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
+ {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
+ {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
+ {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
+ {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
+ {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
+ {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
+ {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
+ {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
+ {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
+ {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
+ {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
+ {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
+ {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
+ {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
+ {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
+ {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
+ {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
+ {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
+ {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
+ {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
+ {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
+ {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
+ {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
+ {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
+ {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
+ {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
+ {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
+ {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
+ {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
+ {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
+ {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
+ {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
+ {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
+ {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
+ {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
+ {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
+ {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
+ {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
+ {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
+ {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
+ {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
+ {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
+ {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
+ {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
+ {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
+ {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
+ {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
+ {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
+ {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
+ {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
+ {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
+ {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
+ {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
+ {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
+ {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
+ {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
+ {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
+ {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
+};
+
+#endif // #ifdef CROARING_IS_X64
+
+#ifdef CROARING_IS_X64
+// same as vecDecodeTable but in 16 bits
+ALIGNED(32)
+static uint16_t vecDecodeTable_uint16[256][8] = {
+ {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
+ {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
+ {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
+ {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
+ {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
+ {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
+ {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
+ {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
+ {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
+ {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
+ {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
+ {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
+ {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
+ {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
+ {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
+ {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
+ {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
+ {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
+ {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
+ {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
+ {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
+ {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
+ {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
+ {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
+ {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
+ {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
+ {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
+ {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
+ {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
+ {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
+ {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
+ {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
+ {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
+ {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
+ {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
+ {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
+ {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
+ {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
+ {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
+ {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
+ {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
+ {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
+ {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
+ {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
+ {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
+ {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
+ {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
+ {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
+ {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
+ {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
+ {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
+ {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
+ {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
+ {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
+ {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
+ {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
+ {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
+ {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
+ {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
+ {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
+ {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
+ {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
+ {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
+ {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
+ {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
+ {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
+ {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
+ {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
+ {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
+ {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
+ {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
+ {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
+ {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
+ {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
+ {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
+ {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
+ {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
+ {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
+ {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
+ {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
+ {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
+ {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
+ {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
+ {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
+ {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
+ {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
+ {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
+ {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
+ {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
+ {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
+ {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
+ {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
+ {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
+ {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
+ {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
+ {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
+ {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
+ {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
+ {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
+ {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
+ {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
+ {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
+ {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
+ {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
+ {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
+ {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
+ {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
+ {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
+ {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
+ {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
+ {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
+ {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
+ {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
+ {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
+ {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
+ {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
+ {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
+ {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
+ {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
+ {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
+ {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
+ {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
+ {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
+ {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
+ {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
+ {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
+ {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
+ {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
+ {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
+ {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
+ {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
+ {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
+ {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
+ {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
+ {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
+ {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
+ {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
+ {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
+ {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
+ {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
+ {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
+ {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
+ {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
+ {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
+ {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
+ {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
+ {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
+ {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
+ {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
+ {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
+ {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
+ {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
+ {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
+ {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
+ {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
+ {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
+ {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
+ {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
+ {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
+ {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
+ {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
+ {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
+ {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
+ {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
+ {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
+ {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
+ {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
+ {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
+ {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
+ {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
+ {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
+ {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
+ {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
+ {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
+ {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
+ {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
+ {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
+ {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
+ {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
+ {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
+ {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
+ {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
+ {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
+ {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
+ {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
+ {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
+ {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
+ {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
+ {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
+ {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
+ {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
+ {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
+ {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
+ {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
+ {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
+ {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
+ {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
+ {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
+ {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
+ {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
+ {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
+ {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
+ {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
+ {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
+ {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
+ {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
+ {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
+ {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
+ {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
+ {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
+ {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
+ {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
+ {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
+ {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
+ {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
+ {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
+ {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
+ {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
+ {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
+ {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
+ {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
+ {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
+ {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
+ {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
+ {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
+ {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
+ {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
+ {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
+ {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
+ {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
+ {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
+ {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
+ {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
+ {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
+ {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
+ {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
+ {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
+ {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
+ {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
+ {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
+ {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
+ {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
+ {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
+ {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
+ {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
+ {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
+ {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
+ {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
+ {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
+ {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
+ {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
+ {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
+ {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
+ {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
+ {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
+ {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
+};
+
+#endif
+
+#ifdef CROARING_IS_X64
+CROARING_TARGET_AVX2
+size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
+ uint32_t *out, size_t outcapacity,
+ uint32_t base) {
+ uint32_t *initout = out;
+ __m256i baseVec = _mm256_set1_epi32(base - 1);
+ __m256i incVec = _mm256_set1_epi32(64);
+ __m256i add8 = _mm256_set1_epi32(8);
+ uint32_t *safeout = out + outcapacity;
+ size_t i = 0;
+ for (; (i < length) && (out + 64 <= safeout); ++i) {
+ uint64_t w = words[i];
+ if (w == 0) {
+ baseVec = _mm256_add_epi32(baseVec, incVec);
+ } else {
+ for (int k = 0; k < 4; ++k) {
+ uint8_t byteA = (uint8_t)w;
+ uint8_t byteB = (uint8_t)(w >> 8);
+ w >>= 16;
+ __m256i vecA =
+ _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]);
+ __m256i vecB =
+ _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]);
+ uint8_t advanceA = lengthTable[byteA];
+ uint8_t advanceB = lengthTable[byteB];
+ vecA = _mm256_add_epi32(baseVec, vecA);
+ baseVec = _mm256_add_epi32(baseVec, add8);
+ vecB = _mm256_add_epi32(baseVec, vecB);
+ baseVec = _mm256_add_epi32(baseVec, add8);
+ _mm256_storeu_si256((__m256i *)out, vecA);
+ out += advanceA;
+ _mm256_storeu_si256((__m256i *)out, vecB);
+ out += advanceB;
+ }
+ }
+ }
+ base += i * 64;
+ for (; (i < length) && (out < safeout); ++i) {
+ uint64_t w = words[i];
+ while ((w != 0) && (out < safeout)) {
+ uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
+ int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+ uint32_t val = r + base;
+ memcpy(out, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ out++;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return out - initout;
+}
+CROARING_UNTARGET_REGION
+#endif // CROARING_IS_X64
+
+size_t bitset_extract_setbits(const uint64_t *words, size_t length,
+ uint32_t *out, uint32_t base) {
+ int outpos = 0;
+ for (size_t i = 0; i < length; ++i) {
+ uint64_t w = words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
+ int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+ uint32_t val = r + base;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return outpos;
+}
+
+size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
+ const uint64_t * __restrict__ words2,
+ size_t length, uint16_t *out,
+ uint16_t base) {
+ int outpos = 0;
+ for (size_t i = 0; i < length; ++i) {
+ uint64_t w = words1[i] & words2[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ out[outpos++] = r + base;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return outpos;
+}
+
+#ifdef CROARING_IS_X64
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out" as 16-bit integers, values start at "base" (can
+ *be set to zero).
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ *
+ * This function uses SSE decoding.
+ */
+CROARING_TARGET_AVX2
+size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
+ uint16_t *out, size_t outcapacity,
+ uint16_t base) {
+ uint16_t *initout = out;
+ __m128i baseVec = _mm_set1_epi16(base - 1);
+ __m128i incVec = _mm_set1_epi16(64);
+ __m128i add8 = _mm_set1_epi16(8);
+ uint16_t *safeout = out + outcapacity;
+ const int numberofbytes = 2; // process two bytes at a time
+ size_t i = 0;
+ for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) {
+ uint64_t w = words[i];
+ if (w == 0) {
+ baseVec = _mm_add_epi16(baseVec, incVec);
+ } else {
+ for (int k = 0; k < 4; ++k) {
+ uint8_t byteA = (uint8_t)w;
+ uint8_t byteB = (uint8_t)(w >> 8);
+ w >>= 16;
+ __m128i vecA = _mm_loadu_si128(
+ (const __m128i *)vecDecodeTable_uint16[byteA]);
+ __m128i vecB = _mm_loadu_si128(
+ (const __m128i *)vecDecodeTable_uint16[byteB]);
+ uint8_t advanceA = lengthTable[byteA];
+ uint8_t advanceB = lengthTable[byteB];
+ vecA = _mm_add_epi16(baseVec, vecA);
+ baseVec = _mm_add_epi16(baseVec, add8);
+ vecB = _mm_add_epi16(baseVec, vecB);
+ baseVec = _mm_add_epi16(baseVec, add8);
+ _mm_storeu_si128((__m128i *)out, vecA);
+ out += advanceA;
+ _mm_storeu_si128((__m128i *)out, vecB);
+ out += advanceB;
+ }
+ }
+ }
+ base += (uint16_t)(i * 64);
+ for (; (i < length) && (out < safeout); ++i) {
+ uint64_t w = words[i];
+ while ((w != 0) && (out < safeout)) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ *out = r + base;
+ out++;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return out - initout;
+}
+CROARING_UNTARGET_REGION
+#endif
+
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out", values start at "base" (can be set to zero).
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ */
+size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
+ uint16_t *out, uint16_t base) {
+ int outpos = 0;
+ for (size_t i = 0; i < length; ++i) {
+ uint64_t w = words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ out[outpos++] = r + base;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return outpos;
+}
+
+#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64)
+
+static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, pos;
+ uint64_t shift = 6;
+ const uint16_t *end = list + length;
+ if (!length) return card;
+ // TODO: could unroll for performance, see bitset_set_list
+ // bts is not available as an intrinsic in GCC
+ __asm volatile(
+ "1:\n"
+ "movzwq (%[list]), %[pos]\n"
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)\n"
+ "sbb $-1, %[card]\n"
+ "add $2, %[list]\n"
+ "cmp %[list], %[end]\n"
+ "jnz 1b"
+ : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
+ [pos] "=&r"(pos), [offset] "=&r"(offset)
+ : [end] "r"(end), [words] "r"(words), [shift] "r"(shift));
+ return card;
+}
+
+static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t pos;
+ const uint16_t *end = list + length;
+
+ uint64_t shift = 6;
+ uint64_t offset;
+ uint64_t load;
+ for (; list + 3 < end; list += 4) {
+ pos = list[0];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ pos = list[1];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ pos = list[2];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ pos = list[3];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ }
+
+ while (list != end) {
+ pos = list[0];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ list++;
+ }
+}
+
+static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ uint64_t offset, load, pos;
+ uint64_t shift = 6;
+ const uint16_t *end = list + length;
+ if (!length) return card;
+ // btr is not available as an intrinsic in GCC
+ __asm volatile(
+ "1:\n"
+ "movzwq (%[list]), %[pos]\n"
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "btr %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)\n"
+ "sbb $0, %[card]\n"
+ "add $2, %[list]\n"
+ "cmp %[list], %[end]\n"
+ "jnz 1b"
+ : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
+ [pos] "=&r"(pos), [offset] "=&r"(offset)
+ : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)
+ :
+ /* clobbers */ "memory");
+ return card;
+}
+
+static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *(const uint16_t *)list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load & ~(UINT64_C(1) << index);
+ card -= (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ card += (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ words[offset] = newload;
+ list++;
+ }
+}
+
+uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ if( croaring_avx2() ) {
+ return _asm_bitset_clear_list(words, card, list, length);
+ } else {
+ return _scalar_bitset_clear_list(words, card, list, length);
+ }
+}
+
+uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ if( croaring_avx2() ) {
+ return _asm_bitset_set_list_withcard(words, card, list, length);
+ } else {
+ return _scalar_bitset_set_list_withcard(words, card, list, length);
+ }
+}
+
+void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ if( croaring_avx2() ) {
+ _asm_bitset_set_list(words, list, length);
+ } else {
+ _scalar_bitset_set_list(words, list, length);
+ }
+}
+#else
+uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *(const uint16_t *)list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load & ~(UINT64_C(1) << index);
+ card -= (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ card += (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ words[offset] = newload;
+ list++;
+ }
+}
+
+#endif
+
+/* flip specified bits */
+/* TODO: consider whether worthwhile to make an asm version */
+
+uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load ^ (UINT64_C(1) << index);
+ // todo: is a branch here all that bad?
+ card +=
+ (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load ^ (UINT64_C(1) << index);
+ words[offset] = newload;
+ list++;
+ }
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/bitset_util.c */
/* begin file src/containers/array.c */
/*
* array.c
@@ -13233,25 +10022,24 @@ extern inline bool array_container_contains(const array_container_t *arr,
uint16_t pos);
extern inline int array_container_cardinality(const array_container_t *array);
extern inline bool array_container_nonzero_cardinality(const array_container_t *array);
-extern inline void array_container_clear(array_container_t *array);
extern inline int32_t array_container_serialized_size_in_bytes(int32_t card);
extern inline bool array_container_empty(const array_container_t *array);
extern inline bool array_container_full(const array_container_t *array);
/* Create a new array with capacity size. Return NULL in case of failure. */
-static array_container_t *array_container_create_given_capacity(int32_t size) {
+array_container_t *array_container_create_given_capacity(int32_t size) {
array_container_t *container;
- if ((container = (array_container_t *)ndpi_malloc(sizeof(array_container_t))) ==
+ if ((container = (array_container_t *)roaring_malloc(sizeof(array_container_t))) ==
NULL) {
return NULL;
}
if( size <= 0 ) { // we don't want to rely on malloc(0)
container->array = NULL;
- } else if ((container->array = (uint16_t *)ndpi_malloc(sizeof(uint16_t) * size)) ==
+ } else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) * size)) ==
NULL) {
- ndpi_free(container);
+ roaring_free(container);
return NULL;
}
@@ -13262,23 +10050,23 @@ static array_container_t *array_container_create_given_capacity(int32_t size) {
}
/* Create a new array. Return NULL in case of failure. */
-static array_container_t *array_container_create() {
+array_container_t *array_container_create() {
return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
}
/* Create a new array containing all values in [min,max). */
-static array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
+array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
array_container_t * answer = array_container_create_given_capacity(max - min + 1);
if(answer == NULL) return answer;
answer->cardinality = 0;
- uint32_t k; for(k = min; k < max; k++) {
+ for(uint32_t k = min; k < max; k++) {
answer->array[answer->cardinality++] = k;
}
return answer;
}
/* Duplicate container */
-static array_container_t *array_container_clone(const array_container_t *src) {
+array_container_t *array_container_clone(const array_container_t *src) {
array_container_t *newcontainer =
array_container_create_given_capacity(src->capacity);
if (newcontainer == NULL) return NULL;
@@ -13291,30 +10079,56 @@ static array_container_t *array_container_clone(const array_container_t *src) {
return newcontainer;
}
-static int array_container_shrink_to_fit(array_container_t *src) {
+void array_container_offset(const array_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset) {
+ array_container_t *lo = NULL, *hi = NULL;
+ int top, lo_cap, hi_cap;
+
+ top = (1 << 16) - offset;
+
+ lo_cap = count_less(c->array, c->cardinality, top);
+ if (loc && lo_cap) {
+ lo = array_container_create_given_capacity(lo_cap);
+ for (int i = 0; i < lo_cap; ++i) {
+ array_container_add(lo, c->array[i] + offset);
+ }
+ *loc = (container_t*)lo;
+ }
+
+ hi_cap = c->cardinality - lo_cap;
+ if (hic && hi_cap) {
+ hi = array_container_create_given_capacity(hi_cap);
+ for (int i = lo_cap; i < c->cardinality; ++i) {
+ array_container_add(hi, c->array[i] + offset);
+ }
+ *hic = (container_t*)hi;
+ }
+}
+
+int array_container_shrink_to_fit(array_container_t *src) {
if (src->cardinality == src->capacity) return 0; // nothing to do
- int old_capacity = src->capacity;
int savings = src->capacity - src->cardinality;
src->capacity = src->cardinality;
if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs
- ndpi_free(src->array);
+ roaring_free(src->array);
src->array = NULL;
} else {
uint16_t *oldarray = src->array;
src->array =
- (uint16_t *)ndpi_realloc(oldarray, old_capacity * sizeof(uint16_t), src->capacity * sizeof(uint16_t));
- if (src->array == NULL) ndpi_free(oldarray); // should never happen?
+ (uint16_t *)roaring_realloc(oldarray, src->capacity * sizeof(uint16_t));
+ if (src->array == NULL) roaring_free(oldarray); // should never happen?
}
return savings;
}
/* Free memory. */
-static void array_container_free(array_container_t *arr) {
+void array_container_free(array_container_t *arr) {
if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise
- ndpi_free(arr->array);
+ roaring_free(arr->array);
arr->array = NULL; // pedantic
}
- ndpi_free(arr);
+ roaring_free(arr);
}
static inline int32_t grow_capacity(int32_t capacity) {
@@ -13328,26 +10142,25 @@ static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
return ((val < min) ? min : (val > max) ? max : val);
}
-static void array_container_grow(array_container_t *container, int32_t min,
+void array_container_grow(array_container_t *container, int32_t min,
bool preserve) {
int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max);
- int32_t old_capacity = container->capacity;
container->capacity = new_capacity;
uint16_t *array = container->array;
if (preserve) {
container->array =
- (uint16_t *)ndpi_realloc(array, old_capacity * sizeof(uint16_t), new_capacity * sizeof(uint16_t));
- if (container->array == NULL) ndpi_free(array);
+ (uint16_t *)roaring_realloc(array, new_capacity * sizeof(uint16_t));
+ if (container->array == NULL) roaring_free(array);
} else {
// Jon Strabala reports that some tools complain otherwise
if (array != NULL) {
- ndpi_free(array);
+ roaring_free(array);
}
- container->array = (uint16_t *)ndpi_malloc(new_capacity * sizeof(uint16_t));
+ container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
}
// handle the case where realloc fails
@@ -13358,7 +10171,7 @@ static void array_container_grow(array_container_t *container, int32_t min,
}
/* Copy one container into another. We assume that they are distinct. */
-static void array_container_copy(const array_container_t *src,
+void array_container_copy(const array_container_t *src,
array_container_t *dst) {
const int32_t cardinality = src->cardinality;
if (cardinality > dst->capacity) {
@@ -13369,9 +10182,9 @@ static void array_container_copy(const array_container_t *src,
memcpy(dst->array, src->array, cardinality * sizeof(uint16_t));
}
-static void array_container_add_from_range(array_container_t *arr, uint32_t min,
+void array_container_add_from_range(array_container_t *arr, uint32_t min,
uint32_t max, uint16_t step) {
- uint32_t value; for (value = min; value < max; value += step) {
+ for (uint32_t value = min; value < max; value += step) {
array_container_append(arr, value);
}
}
@@ -13379,7 +10192,7 @@ static void array_container_add_from_range(array_container_t *arr, uint32_t min,
/* Computes the union of array1 and array2 and write the result to arrayout.
* It is assumed that arrayout is distinct from both array1 and array2.
*/
-static void array_container_union(const array_container_t *array_1,
+void array_container_union(const array_container_t *array_1,
const array_container_t *array_2,
array_container_t *out) {
const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
@@ -13397,7 +10210,7 @@ static void array_container_union(const array_container_t *array_1,
* to array out.
* Array out does not need to be distinct from array_1
*/
-static void array_container_andnot(const array_container_t *array_1,
+void array_container_andnot(const array_container_t *array_1,
const array_container_t *array_2,
array_container_t *out) {
if (out->capacity < array_1->cardinality)
@@ -13424,7 +10237,7 @@ static void array_container_andnot(const array_container_t *array_1,
* to arrayout.
* It is assumed that arrayout is distinct from both array1 and array2.
*/
-static void array_container_xor(const array_container_t *array_1,
+void array_container_xor(const array_container_t *array_1,
const array_container_t *array_2,
array_container_t *out) {
const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
@@ -13458,7 +10271,7 @@ static inline int32_t minimum_int32(int32_t a, int32_t b) {
* arrayout.
* It is assumed that arrayout is distinct from both array1 and array2.
* */
-static void array_container_intersection(const array_container_t *array1,
+void array_container_intersection(const array_container_t *array1,
const array_container_t *array2,
array_container_t *out) {
int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
@@ -13499,7 +10312,7 @@ static void array_container_intersection(const array_container_t *array1,
/* computes the size of the intersection of array1 and array2
* */
-static int array_container_intersection_cardinality(const array_container_t *array1,
+int array_container_intersection_cardinality(const array_container_t *array1,
const array_container_t *array2) {
int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
const int threshold = 64; // subject to tuning
@@ -13525,7 +10338,7 @@ static int array_container_intersection_cardinality(const array_container_t *arr
}
}
-static bool array_container_intersect(const array_container_t *array1,
+bool array_container_intersect(const array_container_t *array1,
const array_container_t *array2) {
int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
const int threshold = 64; // subject to tuning
@@ -13545,7 +10358,7 @@ static bool array_container_intersect(const array_container_t *array1,
/* computes the intersection of array1 and array2 and write the result to
* array1.
* */
-static void array_container_intersection_inplace(array_container_t *src_1,
+void array_container_intersection_inplace(array_container_t *src_1,
const array_container_t *src_2) {
// todo: can any of this be vectorized?
int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
@@ -13562,11 +10375,12 @@ static void array_container_intersection_inplace(array_container_t *src_1,
}
}
-static int array_container_to_uint32_array(void *vout, const array_container_t *cont,
+ALLOW_UNALIGNED
+int array_container_to_uint32_array(void *vout, const array_container_t *cont,
uint32_t base) {
int outpos = 0;
uint32_t *out = (uint32_t *)vout;
- int i = 0; for (i = 0; i < cont->cardinality; ++i) {
+ for (int i = 0; i < cont->cardinality; ++i) {
const uint32_t val = base + cont->array[i];
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
@@ -13575,37 +10389,36 @@ static int array_container_to_uint32_array(void *vout, const array_container_t *
return outpos;
}
-static void array_container_printf(const array_container_t *v) {
+void array_container_printf(const array_container_t *v) {
if (v->cardinality == 0) {
printf("{}");
return;
}
printf("{");
printf("%d", v->array[0]);
- int i ; for (i = 1; i < v->cardinality; ++i) {
+ for (int i = 1; i < v->cardinality; ++i) {
printf(",%d", v->array[i]);
}
printf("}");
}
-static void array_container_printf_as_uint32_array(const array_container_t *v,
+void array_container_printf_as_uint32_array(const array_container_t *v,
uint32_t base) {
if (v->cardinality == 0) {
return;
}
printf("%u", v->array[0] + base);
- int i ; for (i = 1; i < v->cardinality; ++i) {
+ for (int i = 1; i < v->cardinality; ++i) {
printf(",%u", v->array[i] + base);
}
}
/* Compute the number of runs */
-static int32_t array_container_number_of_runs(const array_container_t *ac) {
+int32_t array_container_number_of_runs(const array_container_t *ac) {
// Can SIMD work here?
int32_t nr_runs = 0;
int32_t prev = -2;
- const uint16_t *p;
- for (p = ac->array; p != ac->array + ac->cardinality; ++p) {
+ for (const uint16_t *p = ac->array; p != ac->array + ac->cardinality; ++p) {
if (*p != prev + 1) nr_runs++;
prev = *p;
}
@@ -13618,12 +10431,12 @@ static int32_t array_container_number_of_runs(const array_container_t *ac) {
* array_container_size_in_bytes(container).
*
*/
-static int32_t array_container_write(const array_container_t *container, char *buf) {
+int32_t array_container_write(const array_container_t *container, char *buf) {
memcpy(buf, container->array, container->cardinality * sizeof(uint16_t));
return array_container_size_in_bytes(container);
}
-static bool array_container_is_subset(const array_container_t *container1,
+bool array_container_is_subset(const array_container_t *container1,
const array_container_t *container2) {
if (container1->cardinality > container2->cardinality) {
return false;
@@ -13646,7 +10459,7 @@ static bool array_container_is_subset(const array_container_t *container1,
}
}
-static int32_t array_container_read(int32_t cardinality, array_container_t *container,
+int32_t array_container_read(int32_t cardinality, array_container_t *container,
const char *buf) {
if (container->capacity < cardinality) {
array_container_grow(container, cardinality, false);
@@ -13657,17 +10470,17 @@ static int32_t array_container_read(int32_t cardinality, array_container_t *cont
return array_container_size_in_bytes(container);
}
-static bool array_container_iterate(const array_container_t *cont, uint32_t base,
+bool array_container_iterate(const array_container_t *cont, uint32_t base,
roaring_iterator iterator, void *ptr) {
- int i = 0; for (i = 0; i < cont->cardinality; i++)
+ for (int i = 0; i < cont->cardinality; i++)
if (!iterator(cont->array[i] + base, ptr)) return false;
return true;
}
-static bool array_container_iterate64(const array_container_t *cont, uint32_t base,
+bool array_container_iterate64(const array_container_t *cont, uint32_t base,
roaring_iterator64 iterator, uint64_t high_bits,
void *ptr) {
- int i = 0; for (i = 0; i < cont->cardinality; i++)
+ for (int i = 0; i < cont->cardinality; i++)
if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr))
return false;
return true;
@@ -13677,13 +10490,17 @@ static bool array_container_iterate64(const array_container_t *cont, uint32_t ba
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/array.c */
-/* begin file src/containers/mixed_union.c */
+/* begin file src/containers/bitset.c */
/*
- * mixed_union.c
+ * bitset.c
*
*/
-
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif
#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
@@ -13691,1474 +10508,945 @@ static bool array_container_iterate64(const array_container_t *cont, uint32_t ba
extern "C" { namespace roaring { namespace internal {
#endif
-/* Compute the union of src_1 and src_2 and write the result to
- * dst. */
-static void array_bitset_container_union(const array_container_t *src_1,
- const bitset_container_t *src_2,
- bitset_container_t *dst) {
- if (src_2 != dst) bitset_container_copy(src_2, dst);
- dst->cardinality = (int32_t)bitset_set_list_withcard(
- dst->words, dst->cardinality, src_1->array, src_1->cardinality);
+extern inline int bitset_container_cardinality(const bitset_container_t *bitset);
+extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos);
+// unused at this time:
+//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_get(const bitset_container_t *bitset,
+ uint16_t pos);
+extern inline int32_t bitset_container_serialized_size_in_bytes(void);
+extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_contains(const bitset_container_t *bitset,
+ uint16_t pos);
+
+void bitset_container_clear(bitset_container_t *bitset) {
+ memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ bitset->cardinality = 0;
}
-/* Compute the union of src_1 and src_2 and write the result to
- * dst. It is allowed for src_2 to be dst. This version does not
- * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
-static void array_bitset_container_lazy_union(const array_container_t *src_1,
- const bitset_container_t *src_2,
- bitset_container_t *dst) {
- if (src_2 != dst) bitset_container_copy(src_2, dst);
- bitset_set_list(dst->words, src_1->array, src_1->cardinality);
- dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+void bitset_container_set_all(bitset_container_t *bitset) {
+ memset(bitset->words, INT64_C(-1),
+ sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ bitset->cardinality = (1 << 16);
}
-static void run_bitset_container_union(const run_container_t *src_1,
- const bitset_container_t *src_2,
- bitset_container_t *dst) {
- assert(!run_container_is_full(src_1)); // catch this case upstream
- if (src_2 != dst) bitset_container_copy(src_2, dst);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
- rle16_t rle = src_1->runs[rlepos];
- bitset_set_lenrange(dst->words, rle.value, rle.length);
+
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_container_t *bitset_container_create(void) {
+ bitset_container_t *bitset =
+ (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
+
+ if (!bitset) {
+ return NULL;
}
- dst->cardinality = bitset_container_compute_cardinality(dst);
+ // sizeof(__m256i) == 32
+ bitset->words = (uint64_t *)roaring_aligned_malloc(
+ 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ if (!bitset->words) {
+ roaring_free(bitset);
+ return NULL;
+ }
+ bitset_container_clear(bitset);
+ return bitset;
}
-static void run_bitset_container_lazy_union(const run_container_t *src_1,
- const bitset_container_t *src_2,
- bitset_container_t *dst) {
- assert(!run_container_is_full(src_1)); // catch this case upstream
- if (src_2 != dst) bitset_container_copy(src_2, dst);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
- rle16_t rle = src_1->runs[rlepos];
- bitset_set_lenrange(dst->words, rle.value, rle.length);
- }
- dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+/* Copy one container into another. We assume that they are distinct. */
+void bitset_container_copy(const bitset_container_t *source,
+ bitset_container_t *dest) {
+ dest->cardinality = source->cardinality;
+ memcpy(dest->words, source->words,
+ sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
}
-// why do we leave the result as a run container??
-static void array_run_container_union(const array_container_t *src_1,
- const run_container_t *src_2,
- run_container_t *dst) {
- if (run_container_is_full(src_2)) {
- run_container_copy(src_2, dst);
- return;
- }
- // TODO: see whether the "2*" is spurious
- run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false);
- int32_t rlepos = 0;
- int32_t arraypos = 0;
- rle16_t previousrle;
- if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
- previousrle = run_container_append_first(dst, src_2->runs[rlepos]);
- rlepos++;
- } else {
- previousrle =
- run_container_append_value_first(dst, src_1->array[arraypos]);
- arraypos++;
- }
- while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
- if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
- run_container_append(dst, src_2->runs[rlepos], &previousrle);
- rlepos++;
- } else {
- run_container_append_value(dst, src_1->array[arraypos],
- &previousrle);
- arraypos++;
+void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
+ uint32_t max, uint16_t step) {
+ if (step == 0) return; // refuse to crash
+ if ((64 % step) == 0) { // step divides 64
+ uint64_t mask = 0; // construct the repeated mask
+ for (uint32_t value = (min % step); value < 64; value += step) {
+ mask |= ((uint64_t)1 << value);
}
- }
- if (arraypos < src_1->cardinality) {
- while (arraypos < src_1->cardinality) {
- run_container_append_value(dst, src_1->array[arraypos],
- &previousrle);
- arraypos++;
+ uint32_t firstword = min / 64;
+ uint32_t endword = (max - 1) / 64;
+ bitset->cardinality = (max - min + step - 1) / step;
+ if (firstword == endword) {
+ bitset->words[firstword] |=
+ mask & (((~UINT64_C(0)) << (min % 64)) &
+ ((~UINT64_C(0)) >> ((~max + 1) % 64)));
+ return;
}
+ bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64));
+ for (uint32_t i = firstword + 1; i < endword; i++)
+ bitset->words[i] = mask;
+ bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64));
} else {
- while (rlepos < src_2->n_runs) {
- run_container_append(dst, src_2->runs[rlepos], &previousrle);
- rlepos++;
+ for (uint32_t value = min; value < max; value += step) {
+ bitset_container_add(bitset, value);
}
}
}
-static void array_run_container_inplace_union(const array_container_t *src_1,
- run_container_t *src_2) {
- if (run_container_is_full(src_2)) {
- return;
+/* Free memory. */
+void bitset_container_free(bitset_container_t *bitset) {
+ if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise
+ roaring_aligned_free(bitset->words);
+ bitset->words = NULL; // pedantic
}
- const int32_t maxoutput = src_1->cardinality + src_2->n_runs;
- const int32_t neededcapacity = maxoutput + src_2->n_runs;
- if (src_2->capacity < neededcapacity)
- run_container_grow(src_2, neededcapacity, true);
- memmove(src_2->runs + maxoutput, src_2->runs,
- src_2->n_runs * sizeof(rle16_t));
- rle16_t *inputsrc2 = src_2->runs + maxoutput;
- int32_t rlepos = 0;
- int32_t arraypos = 0;
- int src2nruns = src_2->n_runs;
- src_2->n_runs = 0;
+ roaring_free(bitset);
+}
- rle16_t previousrle;
+/* duplicate container. */
+bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
+ bitset_container_t *bitset =
+ (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
- if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
- previousrle = run_container_append_first(src_2, inputsrc2[rlepos]);
- rlepos++;
- } else {
- previousrle =
- run_container_append_value_first(src_2, src_1->array[arraypos]);
- arraypos++;
+ if (!bitset) {
+ return NULL;
+ }
+ // sizeof(__m256i) == 32
+ bitset->words = (uint64_t *)roaring_aligned_malloc(
+ 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ if (!bitset->words) {
+ roaring_free(bitset);
+ return NULL;
}
+ bitset->cardinality = src->cardinality;
+ memcpy(bitset->words, src->words,
+ sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ return bitset;
+}
- while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) {
- if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
- run_container_append(src_2, inputsrc2[rlepos], &previousrle);
- rlepos++;
+void bitset_container_offset(const bitset_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset) {
+ bitset_container_t *bc = NULL;
+ uint64_t val;
+ uint16_t b, i, end;
+
+ b = offset >> 6;
+ i = offset % 64;
+ end = 1024 - b;
+
+ if (loc != NULL) {
+ bc = bitset_container_create();
+ if (i == 0) {
+ memcpy(bc->words+b, c->words, 8*end);
} else {
- run_container_append_value(src_2, src_1->array[arraypos],
- &previousrle);
- arraypos++;
+ bc->words[b] = c->words[0] << i;
+ for (uint32_t k = 1; k < end; ++k) {
+ val = c->words[k] << i;
+ val |= c->words[k-1] >> (64 - i);
+ bc->words[b+k] = val;
+ }
}
- }
- if (arraypos < src_1->cardinality) {
- while (arraypos < src_1->cardinality) {
- run_container_append_value(src_2, src_1->array[arraypos],
- &previousrle);
- arraypos++;
+
+ bc->cardinality = bitset_container_compute_cardinality(bc);
+ if (bc->cardinality != 0) {
+ *loc = bc;
}
- } else {
- while (rlepos < src2nruns) {
- run_container_append(src_2, inputsrc2[rlepos], &previousrle);
- rlepos++;
+ if (bc->cardinality == c->cardinality) {
+ return;
}
}
-}
-static bool array_array_container_union(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
- int totalCardinality = src_1->cardinality + src_2->cardinality;
- if (totalCardinality <= DEFAULT_MAX_SIZE) {
- *dst = array_container_create_given_capacity(totalCardinality);
- if (*dst != NULL) {
- array_container_union(src_1, src_2, CAST_array(*dst));
- } else {
- return true; // otherwise failure won't be caught
- }
- return false; // not a bitset
+ if (hic == NULL) {
+ // Both hic and loc can't be NULL, so bc is never NULL here
+ if (bc->cardinality == 0) {
+ bitset_container_free(bc);
+ }
+ return;
}
- *dst = bitset_container_create();
- bool returnval = true; // expect a bitset
- if (*dst != NULL) {
- bitset_container_t *ourbitset = CAST_bitset(*dst);
- bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
- ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
- ourbitset->words, src_1->cardinality, src_2->array,
- src_2->cardinality);
- if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
- // need to convert!
- *dst = array_container_from_bitset(ourbitset);
- bitset_container_free(ourbitset);
- returnval = false; // not going to be a bitset
- }
+
+ if (bc == NULL || bc->cardinality != 0) {
+ bc = bitset_container_create();
}
- return returnval;
-}
-static bool array_array_container_inplace_union(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
- int totalCardinality = src_1->cardinality + src_2->cardinality;
- *dst = NULL;
- if (totalCardinality <= DEFAULT_MAX_SIZE) {
- if(src_1->capacity < totalCardinality) {
- *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
- if (*dst != NULL) {
- array_container_union(src_1, src_2, CAST_array(*dst));
- } else {
- return true; // otherwise failure won't be caught
- }
- return false; // not a bitset
- } else {
- memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
- src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
- src_2->array, src_2->cardinality, src_1->array);
- return false; // not a bitset
+ if (i == 0) {
+ memcpy(bc->words, c->words+end, 8*b);
+ } else {
+ for (uint32_t k = end; k < 1024; ++k) {
+ val = c->words[k] << i;
+ val |= c->words[k-1] >> (64 - i);
+ bc->words[k-end] = val;
}
+ bc->words[b] = c->words[1023] >> (64 - i);
}
- *dst = bitset_container_create();
- bool returnval = true; // expect a bitset
- if (*dst != NULL) {
- bitset_container_t *ourbitset = CAST_bitset(*dst);
- bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
- ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
- ourbitset->words, src_1->cardinality, src_2->array,
- src_2->cardinality);
- if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
- // need to convert!
- if(src_1->capacity < ourbitset->cardinality) {
- array_container_grow(src_1, ourbitset->cardinality, false);
- }
- bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS,
- src_1->array, 0);
- src_1->cardinality = ourbitset->cardinality;
- *dst = src_1;
- bitset_container_free(ourbitset);
- returnval = false; // not going to be a bitset
- }
+ bc->cardinality = bitset_container_compute_cardinality(bc);
+ if (bc->cardinality == 0) {
+ bitset_container_free(bc);
+ return;
}
- return returnval;
+ *hic = bc;
}
-
-static bool array_array_container_lazy_union(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
- int totalCardinality = src_1->cardinality + src_2->cardinality;
- if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
- *dst = array_container_create_given_capacity(totalCardinality);
- if (*dst != NULL) {
- array_container_union(src_1, src_2, CAST_array(*dst));
- } else {
- return true; // otherwise failure won't be caught
- }
- return false; // not a bitset
- }
- *dst = bitset_container_create();
- bool returnval = true; // expect a bitset
- if (*dst != NULL) {
- bitset_container_t *ourbitset = CAST_bitset(*dst);
- bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
- bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality);
- ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
- }
- return returnval;
+void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
+ uint32_t end) {
+ bitset_set_range(bitset->words, begin, end);
+ bitset->cardinality =
+ bitset_container_compute_cardinality(bitset); // could be smarter
}
-static bool array_array_container_lazy_inplace_union(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
- int totalCardinality = src_1->cardinality + src_2->cardinality;
- *dst = NULL;
- if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
- if(src_1->capacity < totalCardinality) {
- *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
- if (*dst != NULL) {
- array_container_union(src_1, src_2, CAST_array(*dst));
- } else {
- return true; // otherwise failure won't be caught
- }
- return false; // not a bitset
- } else {
- memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
- src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
- src_2->array, src_2->cardinality, src_1->array);
- return false; // not a bitset
- }
- }
- *dst = bitset_container_create();
- bool returnval = true; // expect a bitset
- if (*dst != NULL) {
- bitset_container_t *ourbitset = CAST_bitset(*dst);
- bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
- bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality);
- ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
+bool bitset_container_intersect(const bitset_container_t *src_1,
+ const bitset_container_t *src_2) {
+ // could vectorize, but this is probably already quite fast in practice
+ const uint64_t * __restrict__ words_1 = src_1->words;
+ const uint64_t * __restrict__ words_2 = src_2->words;
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
+ if((words_1[i] & words_2[i]) != 0) return true;
}
- return returnval;
+ return false;
}
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
-#endif
-/* end file src/containers/mixed_union.c */
-/* begin file src/containers/convert.c */
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+#ifdef CROARING_IS_X64
+#ifndef WORDS_IN_AVX2_REG
+#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
#endif
-
-// file contains grubby stuff that must know impl. details of all container
-// types.
-static bitset_container_t *bitset_container_from_array(const array_container_t *ac) {
- bitset_container_t *ans = bitset_container_create();
- int limit = array_container_cardinality(ac);
- int i = 0; for (i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
- return ans;
+/* Get the number of bits set (force computation) */
+static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ const uint64_t *words = bitset->words;
+ int32_t sum = 0;
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
+ sum += hamming(words[i]);
+ sum += hamming(words[i + 1]);
+ sum += hamming(words[i + 2]);
+ sum += hamming(words[i + 3]);
+ }
+ return sum;
}
+/* Get the number of bits set (force computation) */
+int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ if( croaring_avx2() ) {
+ return (int) avx2_harley_seal_popcount256(
+ (const __m256i *)bitset->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
+ } else {
+ return _scalar_bitset_container_compute_cardinality(bitset);
-static bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
- int card = run_container_cardinality(arr);
- bitset_container_t *answer = bitset_container_create();
- int rlepos; for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
- rle16_t vl = arr->runs[rlepos];
- bitset_set_lenrange(answer->words, vl.value, vl.length);
}
- answer->cardinality = card;
- return answer;
}
-static array_container_t *array_container_from_run(const run_container_t *arr) {
- array_container_t *answer =
- array_container_create_given_capacity(run_container_cardinality(arr));
- answer->cardinality = 0;
- int rlepos; for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
- int run_start = arr->runs[rlepos].value;
- int run_end = run_start + arr->runs[rlepos].length;
-
- int run_value; for (run_value = run_start; run_value <= run_end; ++run_value) {
- answer->array[answer->cardinality++] = (uint16_t)run_value;
- }
+#elif defined(USENEON)
+int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ uint16x8_t n0 = vdupq_n_u16(0);
+ uint16x8_t n1 = vdupq_n_u16(0);
+ uint16x8_t n2 = vdupq_n_u16(0);
+ uint16x8_t n3 = vdupq_n_u16(0);
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) {
+ uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]);
+ n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0))));
+ uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]);
+ n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1))));
+ uint64x2_t c2 = vld1q_u64(&bitset->words[i + 4]);
+ n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2))));
+ uint64x2_t c3 = vld1q_u64(&bitset->words[i + 6]);
+ n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3))));
}
- return answer;
-}
-
-static array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
- array_container_t *result =
- array_container_create_given_capacity(bits->cardinality);
- result->cardinality = bits->cardinality;
- // sse version ends up being slower here
- // (bitset_extract_setbits_sse_uint16)
- // because of the sparsity of the data
- bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
- result->array, 0);
- return result;
-}
-
-/* assumes that container has adequate space. Run from [s,e] (inclusive) */
-static void add_run(run_container_t *rc, int s, int e) {
- rc->runs[rc->n_runs].value = s;
- rc->runs[rc->n_runs].length = e - s;
- rc->n_runs++;
+ uint64x2_t n = vdupq_n_u64(0);
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0)));
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1)));
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2)));
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3)));
+ return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1);
}
-static run_container_t *run_container_from_array(const array_container_t *c) {
- int32_t n_runs = array_container_number_of_runs(c);
- run_container_t *answer = run_container_create_given_capacity(n_runs);
- int prev = -2;
- int run_start = -1;
- int32_t card = c->cardinality;
- if (card == 0) return answer;
- int i = 0; for (i = 0; i < card; ++i) {
- const uint16_t cur_val = c->array[i];
- if (cur_val != prev + 1) {
- // new run starts; flush old one, if any
- if (run_start != -1) add_run(answer, run_start, prev);
- run_start = cur_val;
- }
- prev = c->array[i];
- }
- // now prev is the last seen value
- add_run(answer, run_start, prev);
- // assert(run_container_cardinality(answer) == c->cardinality);
- return answer;
-}
+#else // CROARING_IS_X64
-/**
- * Convert the runcontainer to either a Bitmap or an Array Container, depending
- * on the cardinality. Frees the container.
- * Allocates and returns new container, which caller is responsible for freeing.
- * It does not free the run container.
- */
-static container_t *convert_to_bitset_or_array_container(
- run_container_t *rc, int32_t card,
- uint8_t *resulttype
-){
- if (card <= DEFAULT_MAX_SIZE) {
- array_container_t *answer = array_container_create_given_capacity(card);
- answer->cardinality = 0;
- int rlepos; for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
- uint16_t run_start = rc->runs[rlepos].value;
- uint16_t run_end = run_start + rc->runs[rlepos].length;
- uint16_t run_value; for (run_value = run_start; run_value <= run_end;
- ++run_value) {
- answer->array[answer->cardinality++] = run_value;
- }
- }
- assert(card == answer->cardinality);
- *resulttype = ARRAY_CONTAINER_TYPE;
- //run_container_free(r);
- return answer;
- }
- bitset_container_t *answer = bitset_container_create();
- int rlepos; for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
- uint16_t run_start = rc->runs[rlepos].value;
- bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length);
+/* Get the number of bits set (force computation) */
+int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ const uint64_t *words = bitset->words;
+ int32_t sum = 0;
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
+ sum += hamming(words[i]);
+ sum += hamming(words[i + 1]);
+ sum += hamming(words[i + 2]);
+ sum += hamming(words[i + 3]);
}
- answer->cardinality = card;
- *resulttype = BITSET_CONTAINER_TYPE;
- //run_container_free(r);
- return answer;
+ return sum;
}
-/* Converts a run container to either an array or a bitset, IF it saves space.
- */
-/* If a conversion occurs, the caller is responsible to free the original
- * container and
- * he becomes responsible to free the new one. */
-static container_t *convert_run_to_efficient_container(
- run_container_t *c,
- uint8_t *typecode_after
-){
- int32_t size_as_run_container =
- run_container_serialized_size_in_bytes(c->n_runs);
+#endif // CROARING_IS_X64
- int32_t size_as_bitset_container =
- bitset_container_serialized_size_in_bytes();
- int32_t card = run_container_cardinality(c);
- int32_t size_as_array_container =
- array_container_serialized_size_in_bytes(card);
+#ifdef CROARING_IS_X64
- int32_t min_size_non_run =
- size_as_bitset_container < size_as_array_container
- ? size_as_bitset_container
- : size_as_array_container;
- if (size_as_run_container <= min_size_non_run) { // no conversion
- *typecode_after = RUN_CONTAINER_TYPE;
- return c;
- }
- if (card <= DEFAULT_MAX_SIZE) {
- // to array
- array_container_t *answer = array_container_create_given_capacity(card);
- answer->cardinality = 0;
- int rlepos; for (rlepos = 0; rlepos < c->n_runs; ++rlepos) {
- int run_start = c->runs[rlepos].value;
- int run_end = run_start + c->runs[rlepos].length;
+#define BITSET_CONTAINER_FN_REPEAT 8
+#ifndef WORDS_IN_AVX2_REG
+#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
+#endif // WORDS_IN_AVX2_REG
+#define LOOP_SIZE \
+ BITSET_CONTAINER_SIZE_IN_WORDS / \
+ ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT)
- int run_value; for (run_value = run_start; run_value <= run_end; ++run_value) {
- answer->array[answer->cardinality++] = (uint16_t)run_value;
- }
- }
- *typecode_after = ARRAY_CONTAINER_TYPE;
- return answer;
- }
+/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
+ result to bitsetout */
+// clang-format off
+#define AVX_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ static inline int _avx2_bitset_container_##opname##_nocard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words; \
+ const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words; \
+ /* not using the blocking optimization for some reason*/ \
+ uint8_t *out = (uint8_t *)dst->words; \
+ const int innerloop = 8; \
+ for (size_t i = 0; \
+ i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \
+ i += innerloop) { \
+ __m256i A1, A2, AO; \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)out, AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 32), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 64), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 96), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 128), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 160), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 192), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 224), AO); \
+ out += 256; \
+ words_1 += 256; \
+ words_2 += 256; \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+ }
- // else to bitset
- bitset_container_t *answer = bitset_container_create();
+#define AVX_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ /* next, a version that updates cardinality*/ \
+ static inline int _avx2_bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words; \
+ const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words; \
+ __m256i *out = (__m256i *)dst->words; \
+ dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \
+ words_2, words_1, out, \
+ BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \
+ return dst->cardinality; \
+ } \
- int rlepos; for (rlepos = 0; rlepos < c->n_runs; ++rlepos) {
- int start = c->runs[rlepos].value;
- int end = start + c->runs[rlepos].length;
- bitset_set_range(answer->words, start, end + 1);
- }
- answer->cardinality = card;
- *typecode_after = BITSET_CONTAINER_TYPE;
- return answer;
-}
+#define AVX_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ /* next, a version that just computes the cardinality*/ \
+ static inline int _avx2_bitset_container_##opname##_justcard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2) { \
+ const __m256i *__restrict__ data1 = (const __m256i *)src_1->words; \
+ const __m256i *__restrict__ data2 = (const __m256i *)src_2->words; \
+ return (int)avx2_harley_seal_popcount256_##opname( \
+ data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \
+ }
-// like convert_run_to_efficient_container but frees the old result if needed
-static container_t *convert_run_to_efficient_container_and_free(
- run_container_t *c,
- uint8_t *typecode_after
-){
- container_t *answer = convert_run_to_efficient_container(c, typecode_after);
- if (answer != c) run_container_free(c);
- return answer;
-}
-/* once converted, the original container is disposed here, rather than
- in roaring_array
-*/
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
-// TODO: split into run- array- and bitset- subfunctions for sanity;
-// a few function calls won't really matter.
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
-static container_t *convert_run_optimize(
- container_t *c, uint8_t typecode_original,
- uint8_t *typecode_after
-){
- if (typecode_original == RUN_CONTAINER_TYPE) {
- container_t *newc = convert_run_to_efficient_container(
- CAST_run(c), typecode_after);
- if (newc != c) {
- container_free(c, typecode_original);
- }
- return newc;
- } else if (typecode_original == ARRAY_CONTAINER_TYPE) {
- // it might need to be converted to a run container.
- array_container_t *c_qua_array = CAST_array(c);
- int32_t n_runs = array_container_number_of_runs(c_qua_array);
- int32_t size_as_run_container =
- run_container_serialized_size_in_bytes(n_runs);
- int32_t card = array_container_cardinality(c_qua_array);
- int32_t size_as_array_container =
- array_container_serialized_size_in_bytes(card);
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- if (size_as_run_container >= size_as_array_container) {
- *typecode_after = ARRAY_CONTAINER_TYPE;
- return c;
- }
- // else convert array to run container
- run_container_t *answer = run_container_create_given_capacity(n_runs);
- int prev = -2;
- int run_start = -1;
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- assert(card > 0);
- int i = 0; for (i = 0; i < card; ++i) {
- uint16_t cur_val = c_qua_array->array[i];
- if (cur_val != prev + 1) {
- // new run starts; flush old one, if any
- if (run_start != -1) add_run(answer, run_start, prev);
- run_start = cur_val;
- }
- prev = c_qua_array->array[i];
- }
- assert(run_start >= 0);
- // now prev is the last seen value
- add_run(answer, run_start, prev);
- *typecode_after = RUN_CONTAINER_TYPE;
- array_container_free(c_qua_array);
- return answer;
- } else if (typecode_original ==
- BITSET_CONTAINER_TYPE) { // run conversions on bitset
- // does bitset need conversion to run?
- bitset_container_t *c_qua_bitset = CAST_bitset(c);
- int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
- int32_t size_as_run_container =
- run_container_serialized_size_in_bytes(n_runs);
- int32_t size_as_bitset_container =
- bitset_container_serialized_size_in_bytes();
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- if (size_as_bitset_container <= size_as_run_container) {
- // no conversion needed.
- *typecode_after = BITSET_CONTAINER_TYPE;
- return c;
- }
- // bitset to runcontainer (ported from Java RunContainer(
- // BitmapContainer bc, int nbrRuns))
- assert(n_runs > 0); // no empty bitmaps
- run_container_t *answer = run_container_create_given_capacity(n_runs);
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- int long_ctr = 0;
- uint64_t cur_word = c_qua_bitset->words[0];
- while (true) {
- while (cur_word == UINT64_C(0) &&
- long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
- cur_word = c_qua_bitset->words[++long_ctr];
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- if (cur_word == UINT64_C(0)) {
- bitset_container_free(c_qua_bitset);
- *typecode_after = RUN_CONTAINER_TYPE;
- return answer;
- }
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- int local_run_start = __builtin_ctzll(cur_word);
- int run_start = local_run_start + 64 * long_ctr;
- uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
- int run_end = 0;
- while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
- long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
- cur_word_with_1s = c_qua_bitset->words[++long_ctr];
- if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
- run_end = 64 + long_ctr * 64; // exclusive, I guess
- add_run(answer, run_start, run_end - 1);
- bitset_container_free(c_qua_bitset);
- *typecode_after = RUN_CONTAINER_TYPE;
- return answer;
- }
- int local_run_end = __builtin_ctzll(~cur_word_with_1s);
- run_end = local_run_end + long_ctr * 64;
- add_run(answer, run_start, run_end - 1);
- cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
- }
- return answer;
- } else {
- assert(false);
- __builtin_unreachable();
- return NULL;
- }
-}
+#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic) \
+ static inline int _scalar_bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t *__restrict__ words_1 = src_1->words; \
+ const uint64_t *__restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ int32_t sum = 0; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
+ out[i] = word_1; \
+ out[i + 1] = word_2; \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ dst->cardinality = sum; \
+ return dst->cardinality; \
+ } \
+ static inline int _scalar_bitset_container_##opname##_nocard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t *__restrict__ words_1 = src_1->words; \
+ const uint64_t *__restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
+ out[i] = (words_1[i])opsymbol(words_2[i]); \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+ } \
+ static inline int _scalar_bitset_container_##opname##_justcard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2) { \
+ const uint64_t *__restrict__ words_1 = src_1->words; \
+ const uint64_t *__restrict__ words_2 = src_2->words; \
+ int32_t sum = 0; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ return sum; \
+ }
-static container_t *container_from_run_range(
- const run_container_t *run,
- uint32_t min, uint32_t max, uint8_t *typecode_after
-){
- // We expect most of the time to end up with a bitset container
- bitset_container_t *bitset = bitset_container_create();
- *typecode_after = BITSET_CONTAINER_TYPE;
- int32_t union_cardinality = 0;
- int32_t i; for (i = 0; i < run->n_runs; ++i) {
- uint32_t rle_min = run->runs[i].value;
- uint32_t rle_max = rle_min + run->runs[i].length;
- bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min);
- union_cardinality += run->runs[i].length + 1;
- }
- union_cardinality += max - min + 1;
- union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min);
- bitset_set_lenrange(bitset->words, min, max - min);
- bitset->cardinality = union_cardinality;
- if(bitset->cardinality <= DEFAULT_MAX_SIZE) {
- // we need to convert to an array container
- array_container_t * array = array_container_from_bitset(bitset);
- *typecode_after = ARRAY_CONTAINER_TYPE;
- bitset_container_free(bitset);
- return array;
- }
- return bitset;
-}
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+SCALAR_BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
+SCALAR_BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
-#endif
-/* end file src/containers/convert.c */
-/* begin file src/containers/run.c */
-#include <stdio.h>
-#include <stdlib.h>
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+SCALAR_BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
+SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
+SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
+SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
-#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
-#endif
-extern inline uint16_t run_container_minimum(const run_container_t *run);
-extern inline uint16_t run_container_maximum(const run_container_t *run);
-extern inline int32_t interleavedBinarySearch(const rle16_t *array,
- int32_t lenarray, uint16_t ikey);
-extern inline bool run_container_contains(const run_container_t *run,
- uint16_t pos);
-extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
-extern inline bool run_container_is_full(const run_container_t *run);
-extern inline bool run_container_nonzero_cardinality(const run_container_t *rc);
-extern inline void run_container_clear(run_container_t *run);
-extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
-extern inline run_container_t *run_container_create_range(uint32_t start,
- uint32_t stop);
-extern inline int run_container_cardinality(const run_container_t *run);
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+ int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ if ( croaring_avx2() ) { \
+ return _avx2_bitset_container_##opname(src_1, src_2, dst); \
+ } else { \
+ return _scalar_bitset_container_##opname(src_1, src_2, dst); \
+ } \
+ } \
+ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ if ( croaring_avx2() ) { \
+ return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } else { \
+ return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } \
+ } \
+ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ if ((croaring_detect_supported_architectures() & CROARING_AVX2) == \
+ CROARING_AVX2) { \
+ return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \
+ } else { \
+ return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \
+ } \
+ }
-static bool run_container_add(run_container_t *run, uint16_t pos) {
- int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
- if (index >= 0) return false; // already there
- index = -index - 2; // points to preceding value, possibly -1
- if (index >= 0) { // possible match
- int32_t offset = pos - run->runs[index].value;
- int32_t le = run->runs[index].length;
- if (offset <= le) return false; // already there
- if (offset == le + 1) {
- // we may need to fuse
- if (index + 1 < run->n_runs) {
- if (run->runs[index + 1].value == pos + 1) {
- // indeed fusion is needed
- run->runs[index].length = run->runs[index + 1].value +
- run->runs[index + 1].length -
- run->runs[index].value;
- recoverRoomAtIndex(run, (uint16_t)(index + 1));
- return true;
- }
- }
- run->runs[index].length++;
- return true;
- }
- if (index + 1 < run->n_runs) {
- // we may need to fuse
- if (run->runs[index + 1].value == pos + 1) {
- // indeed fusion is needed
- run->runs[index + 1].value = pos;
- run->runs[index + 1].length = run->runs[index + 1].length + 1;
- return true;
- }
- }
- }
- if (index == -1) {
- // we may need to extend the first run
- if (0 < run->n_runs) {
- if (run->runs[0].value == pos + 1) {
- run->runs[0].length++;
- run->runs[0].value--;
- return true;
- }
- }
- }
- makeRoomAtIndex(run, (uint16_t)(index + 1));
- run->runs[index + 1].value = pos;
- run->runs[index + 1].length = 0;
- return true;
-}
-/* Create a new run container. Return NULL in case of failure. */
-static run_container_t *run_container_create_given_capacity(int32_t size) {
- run_container_t *run;
- /* Allocate the run container itself. */
- if ((run = (run_container_t *)ndpi_malloc(sizeof(run_container_t))) == NULL) {
- return NULL;
- }
- if (size <= 0 ) { // we don't want to rely on malloc(0)
- run->runs = NULL;
- } else if ((run->runs = (rle16_t *)ndpi_malloc(sizeof(rle16_t) * size)) == NULL) {
- ndpi_free(run);
- return NULL;
- }
- run->capacity = size;
- run->n_runs = 0;
- return run;
-}
+#elif defined(USENEON)
-static int run_container_shrink_to_fit(run_container_t *src) {
- if (src->n_runs == src->capacity) return 0; // nothing to do
- int savings = src->capacity - src->n_runs;
- int old_capacity = src->capacity;
- src->capacity = src->n_runs;
- rle16_t *oldruns = src->runs;
- src->runs = (rle16_t *)ndpi_realloc(oldruns, old_capacity * sizeof(rle16_t), src->capacity * sizeof(rle16_t));
- if (src->runs == NULL) ndpi_free(oldruns); // should never happen?
- return savings;
-}
-/* Create a new run container. Return NULL in case of failure. */
-static run_container_t *run_container_create(void) {
- return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ uint16x8_t n0 = vdupq_n_u16(0); \
+ uint16x8_t n1 = vdupq_n_u16(0); \
+ uint16x8_t n2 = vdupq_n_u16(0); \
+ uint16x8_t n3 = vdupq_n_u16(0); \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
+ vld1q_u64(&words_2[i + 0])); \
+ n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
+ vst1q_u64(&out[i + 0], c0); \
+ uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
+ vld1q_u64(&words_2[i + 2])); \
+ n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
+ vst1q_u64(&out[i + 2], c1); \
+ uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
+ vld1q_u64(&words_2[i + 4])); \
+ n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
+ vst1q_u64(&out[i + 4], c2); \
+ uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
+ vld1q_u64(&words_2[i + 6])); \
+ n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
+ vst1q_u64(&out[i + 6], c3); \
+ } \
+ uint64x2_t n = vdupq_n_u64(0); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
+ dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
+ vld1q_u64(&words_2[i + 0]))); \
+ vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
+ vld1q_u64(&words_2[i + 2]))); \
+ vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
+ vld1q_u64(&words_2[i + 4]))); \
+ vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
+ vld1q_u64(&words_2[i + 6]))); \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint16x8_t n0 = vdupq_n_u16(0); \
+ uint16x8_t n1 = vdupq_n_u16(0); \
+ uint16x8_t n2 = vdupq_n_u16(0); \
+ uint16x8_t n3 = vdupq_n_u16(0); \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
+ vld1q_u64(&words_2[i + 0])); \
+ n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
+ uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
+ vld1q_u64(&words_2[i + 2])); \
+ n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
+ uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
+ vld1q_u64(&words_2[i + 4])); \
+ n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
+ uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
+ vld1q_u64(&words_2[i + 6])); \
+ n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
+ } \
+ uint64x2_t n = vdupq_n_u64(0); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
+ return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
}
-static run_container_t *run_container_clone(const run_container_t *src) {
- run_container_t *run = run_container_create_given_capacity(src->capacity);
- if (run == NULL) return NULL;
- run->capacity = src->capacity;
- run->n_runs = src->n_runs;
- memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
- return run;
-}
+#else
-/* Free memory. */
-static void run_container_free(run_container_t *run) {
- if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
- ndpi_free(run->runs);
- run->runs = NULL; // pedantic
- }
- ndpi_free(run);
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ int32_t sum = 0; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
+ out[i] = word_1; \
+ out[i + 1] = word_2; \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ dst->cardinality = sum; \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
+ out[i] = (words_1[i])opsymbol(words_2[i]); \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ int32_t sum = 0; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ return sum; \
}
-static void run_container_grow(run_container_t *run, int32_t min, bool copy) {
- int32_t newCapacity =
- (run->capacity == 0)
- ? RUN_DEFAULT_INIT_SIZE
- : run->capacity < 64 ? run->capacity * 2
- : run->capacity < 1024 ? run->capacity * 3 / 2
- : run->capacity * 5 / 4;
- int32_t old_capacity = run->capacity;
- if (newCapacity < min) newCapacity = min;
- run->capacity = newCapacity;
- assert(run->capacity >= min);
- if (copy) {
- rle16_t *oldruns = run->runs;
- run->runs =
- (rle16_t *)ndpi_realloc(oldruns, old_capacity * sizeof(rle16_t), run->capacity * sizeof(rle16_t));
- if (run->runs == NULL) ndpi_free(oldruns);
- } else {
- // Jon Strabala reports that some tools complain otherwise
- if (run->runs != NULL) {
- ndpi_free(run->runs);
- }
- run->runs = (rle16_t *)ndpi_malloc(run->capacity * sizeof(rle16_t));
- }
- // handle the case where realloc fails
- if (run->runs == NULL) {
- fprintf(stderr, "could not allocate memory\n");
- }
- assert(run->runs != NULL);
-}
+#endif // CROARING_IS_X64
-/* copy one container into another */
-static void run_container_copy(const run_container_t *src, run_container_t *dst) {
- const int32_t n_runs = src->n_runs;
- if (src->n_runs > dst->capacity) {
- run_container_grow(dst, n_runs, false);
- }
- dst->n_runs = n_runs;
- memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
-}
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
+BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
-/* Compute the union of `src_1' and `src_2' and write the result to `dst'
- * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
-static void run_container_union(const run_container_t *src_1,
- const run_container_t *src_2, run_container_t *dst) {
- // TODO: this could be a lot more efficient
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
+BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
- // we start out with inexpensive checks
- const bool if1 = run_container_is_full(src_1);
- const bool if2 = run_container_is_full(src_2);
- if (if1 || if2) {
- if (if1) {
- run_container_copy(src_1, dst);
- return;
- }
- if (if2) {
- run_container_copy(src_2, dst);
- return;
- }
- }
- const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
- if (dst->capacity < neededcapacity)
- run_container_grow(dst, neededcapacity, false);
- dst->n_runs = 0;
- int32_t rlepos = 0;
- int32_t xrlepos = 0;
+BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
+BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
+// clang-format On
- rle16_t previousrle;
- if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
- previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
- rlepos++;
- } else {
- previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
- xrlepos++;
- }
- while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
- rle16_t newrl;
- if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
- newrl = src_1->runs[rlepos];
- rlepos++;
- } else {
- newrl = src_2->runs[xrlepos];
- xrlepos++;
- }
- run_container_append(dst, newrl, &previousrle);
- }
- while (xrlepos < src_2->n_runs) {
- run_container_append(dst, src_2->runs[xrlepos], &previousrle);
- xrlepos++;
- }
- while (rlepos < src_1->n_runs) {
- run_container_append(dst, src_1->runs[rlepos], &previousrle);
- rlepos++;
- }
+ALLOW_UNALIGNED
+int bitset_container_to_uint32_array(
+ uint32_t *out,
+ const bitset_container_t *bc,
+ uint32_t base
+){
+#ifdef CROARING_IS_X64
+ if(( croaring_avx2() ) && (bc->cardinality >= 8192)) // heuristic
+ return (int) bitset_extract_setbits_avx2(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
+ else
+ return (int) bitset_extract_setbits(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
+#else
+ return (int) bitset_extract_setbits(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
+#endif
}
-/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
+/*
+ * Print this container using printf (useful for debugging).
*/
-static void run_container_union_inplace(run_container_t *src_1,
- const run_container_t *src_2) {
- // TODO: this could be a lot more efficient
-
- // we start out with inexpensive checks
- const bool if1 = run_container_is_full(src_1);
- const bool if2 = run_container_is_full(src_2);
- if (if1 || if2) {
- if (if1) {
- return;
- }
- if (if2) {
- run_container_copy(src_2, src_1);
- return;
- }
- }
- // we move the data to the end of the current array
- const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
- const int32_t neededcapacity = maxoutput + src_1->n_runs;
- if (src_1->capacity < neededcapacity)
- run_container_grow(src_1, neededcapacity, true);
- memmove(src_1->runs + maxoutput, src_1->runs,
- src_1->n_runs * sizeof(rle16_t));
- rle16_t *inputsrc1 = src_1->runs + maxoutput;
- const int32_t input1nruns = src_1->n_runs;
- src_1->n_runs = 0;
- int32_t rlepos = 0;
- int32_t xrlepos = 0;
-
- rle16_t previousrle;
- if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
- previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
- rlepos++;
- } else {
- previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
- xrlepos++;
- }
- while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
- rle16_t newrl;
- if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
- newrl = inputsrc1[rlepos];
- rlepos++;
- } else {
- newrl = src_2->runs[xrlepos];
- xrlepos++;
- }
- run_container_append(src_1, newrl, &previousrle);
- }
- while (xrlepos < src_2->n_runs) {
- run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
- xrlepos++;
- }
- while (rlepos < input1nruns) {
- run_container_append(src_1, inputsrc1[rlepos], &previousrle);
- rlepos++;
- }
+void bitset_container_printf(const bitset_container_t * v) {
+ printf("{");
+ uint32_t base = 0;
+ bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ uint64_t w = v->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(iamfirst) {// predicted to be false
+ printf("%u",base + r);
+ iamfirst = false;
+ } else {
+ printf(",%u",base + r);
+ }
+ w ^= t;
+ }
+ base += 64;
+ }
+ printf("}");
}
-/* Compute the symmetric difference of `src_1' and `src_2' and write the result
- * to `dst'
- * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
-static void run_container_xor(const run_container_t *src_1,
- const run_container_t *src_2, run_container_t *dst) {
- // don't bother to convert xor with full range into negation
- // since negation is implemented similarly
-
- const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
- if (dst->capacity < neededcapacity)
- run_container_grow(dst, neededcapacity, false);
-
- int32_t pos1 = 0;
- int32_t pos2 = 0;
- dst->n_runs = 0;
-
- while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
- if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
- run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
- src_1->runs[pos1].length);
- pos1++;
- } else {
- run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
- src_2->runs[pos2].length);
- pos2++;
- }
- }
- while (pos1 < src_1->n_runs) {
- run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
- src_1->runs[pos1].length);
- pos1++;
- }
- while (pos2 < src_2->n_runs) {
- run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
- src_2->runs[pos2].length);
- pos2++;
- }
+/*
+ * Print this container using printf as a comma-separated list of 32-bit integers starting at base.
+ */
+void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) {
+ bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ uint64_t w = v->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(iamfirst) {// predicted to be false
+ printf("%u", r + base);
+ iamfirst = false;
+ } else {
+ printf(",%u",r + base);
+ }
+ w ^= t;
+ }
+ base += 64;
+ }
}
-/* Compute the intersection of src_1 and src_2 and write the result to
- * dst. It is assumed that dst is distinct from both src_1 and src_2. */
-static void run_container_intersection(const run_container_t *src_1,
- const run_container_t *src_2,
- run_container_t *dst) {
- const bool if1 = run_container_is_full(src_1);
- const bool if2 = run_container_is_full(src_2);
- if (if1 || if2) {
- if (if1) {
- run_container_copy(src_2, dst);
- return;
- }
- if (if2) {
- run_container_copy(src_1, dst);
- return;
- }
- }
- // TODO: this could be a lot more efficient, could use SIMD optimizations
- const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
- if (dst->capacity < neededcapacity)
- run_container_grow(dst, neededcapacity, false);
- dst->n_runs = 0;
- int32_t rlepos = 0;
- int32_t xrlepos = 0;
- int32_t start = src_1->runs[rlepos].value;
- int32_t end = start + src_1->runs[rlepos].length + 1;
- int32_t xstart = src_2->runs[xrlepos].value;
- int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
- while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
- if (end <= xstart) {
- ++rlepos;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
- } else if (xend <= start) {
- ++xrlepos;
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- } else { // they overlap
- const int32_t lateststart = start > xstart ? start : xstart;
- int32_t earliestend;
- if (end == xend) { // improbable
- earliestend = end;
- rlepos++;
- xrlepos++;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- } else if (end < xend) {
- earliestend = end;
- rlepos++;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
-
- } else { // end > xend
- earliestend = xend;
- xrlepos++;
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- }
- dst->runs[dst->n_runs].value = (uint16_t)lateststart;
- dst->runs[dst->n_runs].length =
- (uint16_t)(earliestend - lateststart - 1);
- dst->n_runs++;
- }
- }
-}
-/* Compute the size of the intersection of src_1 and src_2 . */
-static int run_container_intersection_cardinality(const run_container_t *src_1,
- const run_container_t *src_2) {
- const bool if1 = run_container_is_full(src_1);
- const bool if2 = run_container_is_full(src_2);
- if (if1 || if2) {
- if (if1) {
- return run_container_cardinality(src_2);
- }
- if (if2) {
- return run_container_cardinality(src_1);
- }
- }
- int answer = 0;
- int32_t rlepos = 0;
- int32_t xrlepos = 0;
- int32_t start = src_1->runs[rlepos].value;
- int32_t end = start + src_1->runs[rlepos].length + 1;
- int32_t xstart = src_2->runs[xrlepos].value;
- int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
- while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
- if (end <= xstart) {
- ++rlepos;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
- } else if (xend <= start) {
- ++xrlepos;
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- } else { // they overlap
- const int32_t lateststart = start > xstart ? start : xstart;
- int32_t earliestend;
- if (end == xend) { // improbable
- earliestend = end;
- rlepos++;
- xrlepos++;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- } else if (end < xend) {
- earliestend = end;
- rlepos++;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
+// TODO: use the fast lower bound, also
+int bitset_container_number_of_runs(bitset_container_t *bc) {
+ int num_runs = 0;
+ uint64_t next_word = bc->words[0];
- } else { // end > xend
- earliestend = xend;
- xrlepos++;
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- }
- answer += earliestend - lateststart;
- }
- }
- return answer;
-}
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
+ uint64_t word = next_word;
+ next_word = bc->words[i+1];
+ num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
+ }
-static bool run_container_intersect(const run_container_t *src_1,
- const run_container_t *src_2) {
- const bool if1 = run_container_is_full(src_1);
- const bool if2 = run_container_is_full(src_2);
- if (if1 || if2) {
- if (if1) {
- return !run_container_empty(src_2);
- }
- if (if2) {
- return !run_container_empty(src_1);
- }
- }
- int32_t rlepos = 0;
- int32_t xrlepos = 0;
- int32_t start = src_1->runs[rlepos].value;
- int32_t end = start + src_1->runs[rlepos].length + 1;
- int32_t xstart = src_2->runs[xrlepos].value;
- int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
- while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
- if (end <= xstart) {
- ++rlepos;
- if (rlepos < src_1->n_runs) {
- start = src_1->runs[rlepos].value;
- end = start + src_1->runs[rlepos].length + 1;
- }
- } else if (xend <= start) {
- ++xrlepos;
- if (xrlepos < src_2->n_runs) {
- xstart = src_2->runs[xrlepos].value;
- xend = xstart + src_2->runs[xrlepos].length + 1;
- }
- } else { // they overlap
- return true;
- }
- }
- return false;
+ uint64_t word = next_word;
+ num_runs += hamming((~word) & (word << 1));
+ if((word & 0x8000000000000000ULL) != 0)
+ num_runs++;
+ return num_runs;
}
-/* Compute the difference of src_1 and src_2 and write the result to
- * dst. It is assumed that dst is distinct from both src_1 and src_2. */
-static void run_container_andnot(const run_container_t *src_1,
- const run_container_t *src_2, run_container_t *dst) {
- // following Java implementation as of June 2016
-
- if (dst->capacity < src_1->n_runs + src_2->n_runs)
- run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
-
- dst->n_runs = 0;
-
- int rlepos1 = 0;
- int rlepos2 = 0;
- int32_t start = src_1->runs[rlepos1].value;
- int32_t end = start + src_1->runs[rlepos1].length + 1;
- int32_t start2 = src_2->runs[rlepos2].value;
- int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
-
- while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
- if (end <= start2) {
- // output the first run
- dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1);
- rlepos1++;
- if (rlepos1 < src_1->n_runs) {
- start = src_1->runs[rlepos1].value;
- end = start + src_1->runs[rlepos1].length + 1;
- }
- } else if (end2 <= start) {
- // exit the second run
- rlepos2++;
- if (rlepos2 < src_2->n_runs) {
- start2 = src_2->runs[rlepos2].value;
- end2 = start2 + src_2->runs[rlepos2].length + 1;
- }
- } else {
- if (start < start2) {
- dst->runs[dst->n_runs++] =
- MAKE_RLE16(start, start2 - start - 1);
- }
- if (end2 < end) {
- start = end2;
- } else {
- rlepos1++;
- if (rlepos1 < src_1->n_runs) {
- start = src_1->runs[rlepos1].value;
- end = start + src_1->runs[rlepos1].length + 1;
- }
- }
- }
- }
- if (rlepos1 < src_1->n_runs) {
- dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1);
- rlepos1++;
- if (rlepos1 < src_1->n_runs) {
- memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
- sizeof(rle16_t) * (src_1->n_runs - rlepos1));
- dst->n_runs += src_1->n_runs - rlepos1;
- }
- }
-}
-
-static int run_container_to_uint32_array(void *vout, const run_container_t *cont,
- uint32_t base) {
- int outpos = 0;
- uint32_t *out = (uint32_t *)vout;
- int i = 0; for (i = 0; i < cont->n_runs; ++i) {
- uint32_t run_start = base + cont->runs[i].value;
- uint16_t le = cont->runs[i].length;
- int j; for (j = 0; j <= le; ++j) {
- uint32_t val = run_start + j;
- memcpy(out + outpos, &val,
- sizeof(uint32_t)); // should be compiled as a MOV on x64
- outpos++;
- }
- }
- return outpos;
+int32_t bitset_container_write(const bitset_container_t *container,
+ char *buf) {
+ memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ return bitset_container_size_in_bytes(container);
}
-/*
- * Print this container using printf (useful for debugging).
- */
-static void run_container_printf(const run_container_t *cont) {
- int i = 0; for (i = 0; i < cont->n_runs; ++i) {
- uint16_t run_start = cont->runs[i].value;
- uint16_t le = cont->runs[i].length;
- printf("[%d,%d]", run_start, run_start + le);
- }
-}
-/*
- * Print this container using printf as a comma-separated list of 32-bit
- * integers starting at base.
- */
-static void run_container_printf_as_uint32_array(const run_container_t *cont,
- uint32_t base) {
- if (cont->n_runs == 0) return;
- {
- uint32_t run_start = base + cont->runs[0].value;
- uint16_t le = cont->runs[0].length;
- printf("%u", run_start);
- uint32_t j; for (j = 1; j <= le; ++j) printf(",%u", run_start + j);
- }
- int32_t i; for (i = 1; i < cont->n_runs; ++i) {
- uint32_t run_start = base + cont->runs[i].value;
- uint16_t le = cont->runs[i].length;
- uint32_t j; for (j = 0; j <= le; ++j) printf(",%u", run_start + j);
- }
-}
-
-static int32_t run_container_write(const run_container_t *container, char *buf) {
- memcpy(buf, &container->n_runs, sizeof(uint16_t));
- memcpy(buf + sizeof(uint16_t), container->runs,
- container->n_runs * sizeof(rle16_t));
- return run_container_size_in_bytes(container);
+int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
+ const char *buf) {
+ container->cardinality = cardinality;
+ memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ return bitset_container_size_in_bytes(container);
}
-static int32_t run_container_read(int32_t cardinality, run_container_t *container,
- const char *buf) {
- (void)cardinality;
- memcpy(&container->n_runs, buf, sizeof(uint16_t));
- if (container->n_runs > container->capacity)
- run_container_grow(container, container->n_runs, false);
- if(container->n_runs > 0) {
- memcpy(container->runs, buf + sizeof(uint16_t),
- container->n_runs * sizeof(rle16_t));
+bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
+ for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ uint64_t w = cont->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(!iterator(r + base, ptr)) return false;
+ w ^= t;
}
- return run_container_size_in_bytes(container);
+ base += 64;
+ }
+ return true;
}
-static bool run_container_iterate(const run_container_t *cont, uint32_t base,
- roaring_iterator iterator, void *ptr) {
- int i = 0; for (i = 0; i < cont->n_runs; ++i) {
- uint32_t run_start = base + cont->runs[i].value;
- uint16_t le = cont->runs[i].length;
-
- int j; for (j = 0; j <= le; ++j)
- if (!iterator(run_start + j, ptr)) return false;
+bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) {
+ for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ uint64_t w = cont->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
+ w ^= t;
}
- return true;
+ base += 64;
+ }
+ return true;
}
-static bool run_container_iterate64(const run_container_t *cont, uint32_t base,
- roaring_iterator64 iterator, uint64_t high_bits,
- void *ptr) {
- int i = 0; for (i = 0; i < cont->n_runs; ++i) {
- uint32_t run_start = base + cont->runs[i].value;
- uint16_t le = cont->runs[i].length;
-
- int j; for (j = 0; j <= le; ++j)
- if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
- return false;
- }
- return true;
+#ifdef CROARING_IS_X64
+CROARING_TARGET_AVX2
+ALLOW_UNALIGNED
+static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
+ const __m256i *ptr1 = (const __m256i*)container1->words;
+ const __m256i *ptr2 = (const __m256i*)container2->words;
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
+ __m256i r1 = _mm256_loadu_si256(ptr1+i);
+ __m256i r2 = _mm256_loadu_si256(ptr2+i);
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
+ if ((uint32_t)mask != UINT32_MAX) {
+ return false;
+ }
+ }
+ return true;
}
+CROARING_UNTARGET_REGION
+#endif // CROARING_IS_X64
-static bool run_container_is_subset(const run_container_t *container1,
- const run_container_t *container2) {
- int i1 = 0, i2 = 0;
- while (i1 < container1->n_runs && i2 < container2->n_runs) {
- int start1 = container1->runs[i1].value;
- int stop1 = start1 + container1->runs[i1].length;
- int start2 = container2->runs[i2].value;
- int stop2 = start2 + container2->runs[i2].length;
- if (start1 < start2) {
- return false;
- } else { // start1 >= start2
- if (stop1 < stop2) {
- i1++;
- } else if (stop1 == stop2) {
- i1++;
- i2++;
- } else { // stop1 > stop2
- i2++;
- }
- }
+ALLOW_UNALIGNED
+bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
+ if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
+ if(container1->cardinality != container2->cardinality) {
+ return false;
}
- if (i1 == container1->n_runs) {
- return true;
- } else {
- return false;
+ if (container1->cardinality == INT32_C(0x10000)) {
+ return true;
}
+ }
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ return _avx2_bitset_container_equals(container1, container2);
+ }
+#endif
+ return memcmp(container1->words,
+ container2->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0;
}
-// TODO: write smart_append_exclusive version to match the overloaded 1 param
-// Java version (or is it even used?)
-
-// follows the Java implementation closely
-// length is the rle-value. Ie, run [10,12) uses a length value 1.
-static void run_container_smart_append_exclusive(run_container_t *src,
- const uint16_t start,
- const uint16_t length) {
- int old_end;
- rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
- rle16_t *appended_last_run = src->runs + src->n_runs;
-
- if (!src->n_runs ||
- (start > (old_end = last_run->value + last_run->length + 1))) {
- *appended_last_run = MAKE_RLE16(start, length);
- src->n_runs++;
- return;
- }
- if (old_end == start) {
- // we merge
- last_run->length += (length + 1);
- return;
- }
- int new_end = start + length + 1;
-
- if (start == last_run->value) {
- // wipe out previous
- if (new_end < old_end) {
- *last_run = MAKE_RLE16(new_end, old_end - new_end - 1);
- return;
- } else if (new_end > old_end) {
- *last_run = MAKE_RLE16(old_end, new_end - old_end - 1);
- return;
- } else {
- src->n_runs--;
- return;
+bool bitset_container_is_subset(const bitset_container_t *container1,
+ const bitset_container_t *container2) {
+ if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
+ if(container1->cardinality > container2->cardinality) {
+ return false;
}
}
- last_run->length = start - last_run->value - 1;
- if (new_end < old_end) {
- *appended_last_run = MAKE_RLE16(new_end, old_end - new_end - 1);
- src->n_runs++;
- } else if (new_end > old_end) {
- *appended_last_run = MAKE_RLE16(old_end, new_end - old_end - 1);
- src->n_runs++;
- }
+ for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ if((container1->words[i] & container2->words[i]) != container1->words[i]) {
+ return false;
+ }
+ }
+ return true;
}
-static bool run_container_select(const run_container_t *container,
- uint32_t *start_rank, uint32_t rank,
- uint32_t *element) {
- int i = 0; for (i = 0; i < container->n_runs; i++) {
- uint16_t length = container->runs[i].length;
- if (rank <= *start_rank + length) {
- uint16_t value = container->runs[i].value;
- *element = value + rank - (*start_rank);
- return true;
- } else
- *start_rank += length + 1;
+bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) {
+ int card = bitset_container_cardinality(container);
+ if(rank >= *start_rank + card) {
+ *start_rank += card;
+ return false;
}
- return false;
-}
-
-static int run_container_rank(const run_container_t *container, uint16_t x) {
- int sum = 0;
- uint32_t x32 = x;
- int i = 0; for (i = 0; i < container->n_runs; i++) {
- uint32_t startpoint = container->runs[i].value;
- uint32_t length = container->runs[i].length;
- uint32_t endpoint = length + startpoint;
- if (x <= endpoint) {
- if (x < startpoint) break;
- return sum + (x32 - startpoint) + 1;
- } else {
- sum += length + 1;
+ const uint64_t *words = container->words;
+ int32_t size;
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
+ size = hamming(words[i]);
+ if(rank <= *start_rank + size) {
+ uint64_t w = container->words[i];
+ uint16_t base = i*64;
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(*start_rank == rank) {
+ *element = r+base;
+ return true;
+ }
+ w ^= t;
+ *start_rank += 1;
+ }
}
+ else
+ *start_rank += size;
}
- return sum;
+ assert(false);
+ __builtin_unreachable();
}
-#ifdef CROARING_IS_X64
-CROARING_TARGET_AVX2
-/* Get the cardinality of `run'. Requires an actual computation. */
-static inline int _avx2_run_container_cardinality(const run_container_t *run) {
- const int32_t n_runs = run->n_runs;
- const rle16_t *runs = run->runs;
-
- /* by initializing with n_runs, we omit counting the +1 for each pair. */
- int sum = n_runs;
- int32_t k = 0;
- const int32_t step = sizeof(__m256i) / sizeof(rle16_t);
- if (n_runs > step) {
- __m256i total = _mm256_setzero_si256();
- for (; k + step <= n_runs; k += step) {
- __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k));
- __m256i justlengths = _mm256_srli_epi32(ymm1, 16);
- total = _mm256_add_epi32(total, justlengths);
- }
- // a store might be faster than extract?
- uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
- _mm256_storeu_si256((__m256i *)buffer, total);
- sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
- (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
- }
- for (; k < n_runs; ++k) {
- sum += runs[k].length;
+/* Returns the smallest value (assumes not empty) */
+uint16_t bitset_container_minimum(const bitset_container_t *container) {
+ for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ uint64_t w = container->words[i];
+ if (w != 0) {
+ int r = __builtin_ctzll(w);
+ return r + i * 64;
}
-
- return sum;
+ }
+ return UINT16_MAX;
}
-CROARING_UNTARGET_REGION
-
-/* Get the cardinality of `run'. Requires an actual computation. */
-static inline int _scalar_run_container_cardinality(const run_container_t *run) {
- const int32_t n_runs = run->n_runs;
- const rle16_t *runs = run->runs;
-
- /* by initializing with n_runs, we omit counting the +1 for each pair. */
- int sum = n_runs;
- for (int k = 0; k < n_runs; ++k) {
- sum += runs[k].length;
+/* Returns the largest value (assumes not empty) */
+uint16_t bitset_container_maximum(const bitset_container_t *container) {
+ for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
+ uint64_t w = container->words[i];
+ if (w != 0) {
+ int r = __builtin_clzll(w);
+ return i * 64 + 63 - r;
}
-
- return sum;
+ }
+ return 0;
}
-static int run_container_cardinality(const run_container_t *run) {
- if( croaring_avx2() ) {
- return _avx2_run_container_cardinality(run);
- } else {
- return _scalar_run_container_cardinality(run);
+/* Returns the number of values equal or smaller than x */
+int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
+ // credit: aqrit
+ int sum = 0;
+ int i = 0;
+ for (int end = x / 64; i < end; i++){
+ sum += hamming(container->words[i]);
}
+ uint64_t lastword = container->words[i];
+ uint64_t lastpos = UINT64_C(1) << (x % 64);
+ uint64_t mask = lastpos + lastpos - 1; // smear right
+ sum += hamming(lastword & mask);
+ return sum;
}
-#else
-
-/* Get the cardinality of `run'. Requires an actual computation. */
-static int run_container_cardinality(const run_container_t *run) {
- const int32_t n_runs = run->n_runs;
- const rle16_t *runs = run->runs;
- /* by initializing with n_runs, we omit counting the +1 for each pair. */
- int sum = n_runs;
- int k; for (k = 0; k < n_runs; ++k) {
- sum += runs[k].length;
- }
-
- return sum;
+/* Returns the index of the first value equal or larger than x, or -1 */
+int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
+ uint32_t x32 = x;
+ uint32_t k = x32 / 64;
+ uint64_t word = container->words[k];
+ const int diff = x32 - k * 64; // in [0,64)
+ word = (word >> diff) << diff; // a mask is faster, but we don't care
+ while(word == 0) {
+ k++;
+ if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
+ word = container->words[k];
+ }
+ return k * 64 + __builtin_ctzll(word);
}
-#endif
-
#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
#endif
-/* end file src/containers/run.c */
+/* end file src/containers/bitset.c */
/* begin file src/containers/containers.c */
@@ -15195,7 +11483,7 @@ extern inline container_t *container_iandnot(
const container_t *c2, uint8_t type2,
uint8_t *result_type);
-static void container_free(container_t *c, uint8_t type) {
+void container_free(container_t *c, uint8_t type) {
switch (type) {
case BITSET_CONTAINER_TYPE:
bitset_container_free(CAST_bitset(c));
@@ -15215,7 +11503,7 @@ static void container_free(container_t *c, uint8_t type) {
}
}
-static void container_printf(const container_t *c, uint8_t type) {
+void container_printf(const container_t *c, uint8_t type) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -15232,7 +11520,7 @@ static void container_printf(const container_t *c, uint8_t type) {
}
}
-static void container_printf_as_uint32_array(
+void container_printf_as_uint32_array(
const container_t *c, uint8_t typecode,
uint32_t base
){
@@ -15289,7 +11577,7 @@ extern inline container_t *container_xor(
const container_t *c2, uint8_t type2,
uint8_t *result_type);
-static container_t *get_copy_of_container(
+container_t *get_copy_of_container(
container_t *c, uint8_t *typecode,
bool copy_on_write
){
@@ -15302,7 +11590,7 @@ static container_t *get_copy_of_container(
}
assert(*typecode != SHARED_CONTAINER_TYPE);
- if ((shared_container = (shared_container_t *)ndpi_malloc(
+ if ((shared_container = (shared_container_t *)roaring_malloc(
sizeof(shared_container_t))) == NULL) {
return NULL;
}
@@ -15325,7 +11613,7 @@ static container_t *get_copy_of_container(
* Copies a container, requires a typecode. This allocates new memory, caller
* is responsible for deallocation.
*/
-static container_t *container_clone(const container_t *c, uint8_t typecode) {
+container_t *container_clone(const container_t *c, uint8_t typecode) {
// We do not want to allow cloning of shared containers.
// c = container_unwrap_shared(c, &typecode);
switch (typecode) {
@@ -15345,7 +11633,7 @@ static container_t *container_clone(const container_t *c, uint8_t typecode) {
}
}
-static container_t *shared_container_extract_copy(
+container_t *shared_container_extract_copy(
shared_container_t *sc, uint8_t *typecode
){
assert(sc->counter > 0);
@@ -15356,7 +11644,7 @@ static container_t *shared_container_extract_copy(
if (sc->counter == 0) {
answer = sc->container;
sc->container = NULL; // paranoid
- ndpi_free(sc);
+ roaring_free(sc);
} else {
answer = container_clone(sc->container, *typecode);
}
@@ -15364,14 +11652,14 @@ static container_t *shared_container_extract_copy(
return answer;
}
-static void shared_container_free(shared_container_t *container) {
+void shared_container_free(shared_container_t *container) {
assert(container->counter > 0);
container->counter--;
if (container->counter == 0) {
assert(container->typecode != SHARED_CONTAINER_TYPE);
container_free(container->container, container->typecode);
container->container = NULL; // paranoid
- ndpi_free(container);
+ roaring_free(container);
}
}
@@ -15417,6 +11705,340 @@ extern inline container_t *container_andnot(
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/containers.c */
+/* begin file src/containers/convert.c */
+#include <stdio.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// file contains grubby stuff that must know impl. details of all container
+// types.
+bitset_container_t *bitset_container_from_array(const array_container_t *ac) {
+ bitset_container_t *ans = bitset_container_create();
+ int limit = array_container_cardinality(ac);
+ for (int i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
+ return ans;
+}
+
+bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
+ int card = run_container_cardinality(arr);
+ bitset_container_t *answer = bitset_container_create();
+ for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
+ rle16_t vl = arr->runs[rlepos];
+ bitset_set_lenrange(answer->words, vl.value, vl.length);
+ }
+ answer->cardinality = card;
+ return answer;
+}
+
+array_container_t *array_container_from_run(const run_container_t *arr) {
+ array_container_t *answer =
+ array_container_create_given_capacity(run_container_cardinality(arr));
+ answer->cardinality = 0;
+ for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
+ int run_start = arr->runs[rlepos].value;
+ int run_end = run_start + arr->runs[rlepos].length;
+
+ for (int run_value = run_start; run_value <= run_end; ++run_value) {
+ answer->array[answer->cardinality++] = (uint16_t)run_value;
+ }
+ }
+ return answer;
+}
+
+array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
+ array_container_t *result =
+ array_container_create_given_capacity(bits->cardinality);
+ result->cardinality = bits->cardinality;
+ // sse version ends up being slower here
+ // (bitset_extract_setbits_sse_uint16)
+ // because of the sparsity of the data
+ bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+ result->array, 0);
+ return result;
+}
+
+/* assumes that container has adequate space. Run from [s,e] (inclusive) */
+static void add_run(run_container_t *rc, int s, int e) {
+ rc->runs[rc->n_runs].value = s;
+ rc->runs[rc->n_runs].length = e - s;
+ rc->n_runs++;
+}
+
+run_container_t *run_container_from_array(const array_container_t *c) {
+ int32_t n_runs = array_container_number_of_runs(c);
+ run_container_t *answer = run_container_create_given_capacity(n_runs);
+ int prev = -2;
+ int run_start = -1;
+ int32_t card = c->cardinality;
+ if (card == 0) return answer;
+ for (int i = 0; i < card; ++i) {
+ const uint16_t cur_val = c->array[i];
+ if (cur_val != prev + 1) {
+ // new run starts; flush old one, if any
+ if (run_start != -1) add_run(answer, run_start, prev);
+ run_start = cur_val;
+ }
+ prev = c->array[i];
+ }
+ // now prev is the last seen value
+ add_run(answer, run_start, prev);
+ // assert(run_container_cardinality(answer) == c->cardinality);
+ return answer;
+}
+
+/**
+ * Convert the runcontainer to either a Bitmap or an Array Container, depending
+ * on the cardinality. Frees the container.
+ * Allocates and returns new container, which caller is responsible for freeing.
+ * It does not free the run container.
+ */
+container_t *convert_to_bitset_or_array_container(
+ run_container_t *rc, int32_t card,
+ uint8_t *resulttype
+){
+ if (card <= DEFAULT_MAX_SIZE) {
+ array_container_t *answer = array_container_create_given_capacity(card);
+ answer->cardinality = 0;
+ for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
+ uint16_t run_start = rc->runs[rlepos].value;
+ uint16_t run_end = run_start + rc->runs[rlepos].length;
+ for (uint16_t run_value = run_start; run_value < run_end;
+ ++run_value) {
+ answer->array[answer->cardinality++] = run_value;
+ }
+ answer->array[answer->cardinality++] = run_end;
+ }
+ assert(card == answer->cardinality);
+ *resulttype = ARRAY_CONTAINER_TYPE;
+ //run_container_free(r);
+ return answer;
+ }
+ bitset_container_t *answer = bitset_container_create();
+ for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
+ uint16_t run_start = rc->runs[rlepos].value;
+ bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length);
+ }
+ answer->cardinality = card;
+ *resulttype = BITSET_CONTAINER_TYPE;
+ //run_container_free(r);
+ return answer;
+}
+
+/* Converts a run container to either an array or a bitset, IF it saves space.
+ */
+/* If a conversion occurs, the caller is responsible to free the original
+ * container and
+ * he becomes responsible to free the new one. */
+container_t *convert_run_to_efficient_container(
+ run_container_t *c,
+ uint8_t *typecode_after
+){
+ int32_t size_as_run_container =
+ run_container_serialized_size_in_bytes(c->n_runs);
+
+ int32_t size_as_bitset_container =
+ bitset_container_serialized_size_in_bytes();
+ int32_t card = run_container_cardinality(c);
+ int32_t size_as_array_container =
+ array_container_serialized_size_in_bytes(card);
+
+ int32_t min_size_non_run =
+ size_as_bitset_container < size_as_array_container
+ ? size_as_bitset_container
+ : size_as_array_container;
+ if (size_as_run_container <= min_size_non_run) { // no conversion
+ *typecode_after = RUN_CONTAINER_TYPE;
+ return c;
+ }
+ if (card <= DEFAULT_MAX_SIZE) {
+ // to array
+ array_container_t *answer = array_container_create_given_capacity(card);
+ answer->cardinality = 0;
+ for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
+ int run_start = c->runs[rlepos].value;
+ int run_end = run_start + c->runs[rlepos].length;
+
+ for (int run_value = run_start; run_value <= run_end; ++run_value) {
+ answer->array[answer->cardinality++] = (uint16_t)run_value;
+ }
+ }
+ *typecode_after = ARRAY_CONTAINER_TYPE;
+ return answer;
+ }
+
+ // else to bitset
+ bitset_container_t *answer = bitset_container_create();
+
+ for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
+ int start = c->runs[rlepos].value;
+ int end = start + c->runs[rlepos].length;
+ bitset_set_range(answer->words, start, end + 1);
+ }
+ answer->cardinality = card;
+ *typecode_after = BITSET_CONTAINER_TYPE;
+ return answer;
+}
+
+// like convert_run_to_efficient_container but frees the old result if needed
+container_t *convert_run_to_efficient_container_and_free(
+ run_container_t *c,
+ uint8_t *typecode_after
+){
+ container_t *answer = convert_run_to_efficient_container(c, typecode_after);
+ if (answer != c) run_container_free(c);
+ return answer;
+}
+
+/* once converted, the original container is disposed here, rather than
+ in roaring_array
+*/
+
+// TODO: split into run- array- and bitset- subfunctions for sanity;
+// a few function calls won't really matter.
+
+container_t *convert_run_optimize(
+ container_t *c, uint8_t typecode_original,
+ uint8_t *typecode_after
+){
+ if (typecode_original == RUN_CONTAINER_TYPE) {
+ container_t *newc = convert_run_to_efficient_container(
+ CAST_run(c), typecode_after);
+ if (newc != c) {
+ container_free(c, typecode_original);
+ }
+ return newc;
+ } else if (typecode_original == ARRAY_CONTAINER_TYPE) {
+ // it might need to be converted to a run container.
+ array_container_t *c_qua_array = CAST_array(c);
+ int32_t n_runs = array_container_number_of_runs(c_qua_array);
+ int32_t size_as_run_container =
+ run_container_serialized_size_in_bytes(n_runs);
+ int32_t card = array_container_cardinality(c_qua_array);
+ int32_t size_as_array_container =
+ array_container_serialized_size_in_bytes(card);
+
+ if (size_as_run_container >= size_as_array_container) {
+ *typecode_after = ARRAY_CONTAINER_TYPE;
+ return c;
+ }
+ // else convert array to run container
+ run_container_t *answer = run_container_create_given_capacity(n_runs);
+ int prev = -2;
+ int run_start = -1;
+
+ assert(card > 0);
+ for (int i = 0; i < card; ++i) {
+ uint16_t cur_val = c_qua_array->array[i];
+ if (cur_val != prev + 1) {
+ // new run starts; flush old one, if any
+ if (run_start != -1) add_run(answer, run_start, prev);
+ run_start = cur_val;
+ }
+ prev = c_qua_array->array[i];
+ }
+ assert(run_start >= 0);
+ // now prev is the last seen value
+ add_run(answer, run_start, prev);
+ *typecode_after = RUN_CONTAINER_TYPE;
+ array_container_free(c_qua_array);
+ return answer;
+ } else if (typecode_original ==
+ BITSET_CONTAINER_TYPE) { // run conversions on bitset
+ // does bitset need conversion to run?
+ bitset_container_t *c_qua_bitset = CAST_bitset(c);
+ int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
+ int32_t size_as_run_container =
+ run_container_serialized_size_in_bytes(n_runs);
+ int32_t size_as_bitset_container =
+ bitset_container_serialized_size_in_bytes();
+
+ if (size_as_bitset_container <= size_as_run_container) {
+ // no conversion needed.
+ *typecode_after = BITSET_CONTAINER_TYPE;
+ return c;
+ }
+ // bitset to runcontainer (ported from Java RunContainer(
+ // BitmapContainer bc, int nbrRuns))
+ assert(n_runs > 0); // no empty bitmaps
+ run_container_t *answer = run_container_create_given_capacity(n_runs);
+
+ int long_ctr = 0;
+ uint64_t cur_word = c_qua_bitset->words[0];
+ while (true) {
+ while (cur_word == UINT64_C(0) &&
+ long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
+ cur_word = c_qua_bitset->words[++long_ctr];
+
+ if (cur_word == UINT64_C(0)) {
+ bitset_container_free(c_qua_bitset);
+ *typecode_after = RUN_CONTAINER_TYPE;
+ return answer;
+ }
+
+ int local_run_start = __builtin_ctzll(cur_word);
+ int run_start = local_run_start + 64 * long_ctr;
+ uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
+
+ int run_end = 0;
+ while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
+ long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
+ cur_word_with_1s = c_qua_bitset->words[++long_ctr];
+
+ if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
+ run_end = 64 + long_ctr * 64; // exclusive, I guess
+ add_run(answer, run_start, run_end - 1);
+ bitset_container_free(c_qua_bitset);
+ *typecode_after = RUN_CONTAINER_TYPE;
+ return answer;
+ }
+ int local_run_end = __builtin_ctzll(~cur_word_with_1s);
+ run_end = local_run_end + long_ctr * 64;
+ add_run(answer, run_start, run_end - 1);
+ cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
+ }
+ return answer;
+ } else {
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+container_t *container_from_run_range(
+ const run_container_t *run,
+ uint32_t min, uint32_t max, uint8_t *typecode_after
+){
+ // We expect most of the time to end up with a bitset container
+ bitset_container_t *bitset = bitset_container_create();
+ *typecode_after = BITSET_CONTAINER_TYPE;
+ int32_t union_cardinality = 0;
+ for (int32_t i = 0; i < run->n_runs; ++i) {
+ uint32_t rle_min = run->runs[i].value;
+ uint32_t rle_max = rle_min + run->runs[i].length;
+ bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min);
+ union_cardinality += run->runs[i].length + 1;
+ }
+ union_cardinality += max - min + 1;
+ union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min);
+ bitset_set_lenrange(bitset->words, min, max - min);
+ bitset->cardinality = union_cardinality;
+ if(bitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // we need to convert to an array container
+ array_container_t * array = array_container_from_bitset(bitset);
+ *typecode_after = ARRAY_CONTAINER_TYPE;
+ bitset_container_free(bitset);
+ return array;
+ }
+ return bitset;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/convert.c */
/* begin file src/containers/mixed_andnot.c */
/*
* mixed_andnot.c. More methods since operation is not symmetric,
@@ -15433,7 +12055,7 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the andnot of src_1 and src_2 and write the result to
* dst, a valid array container that could be the same as dst.*/
-static void array_bitset_container_andnot(const array_container_t *src_1,
+void array_bitset_container_andnot(const array_container_t *src_1,
const bitset_container_t *src_2,
array_container_t *dst) {
// follows Java implementation as of June 2016
@@ -15442,7 +12064,7 @@ static void array_bitset_container_andnot(const array_container_t *src_1,
}
int32_t newcard = 0;
const int32_t origcard = src_1->cardinality;
- int i = 0; for (i = 0; i < origcard; ++i) {
+ for (int i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
dst->array[newcard] = key;
newcard += 1 - bitset_container_contains(src_2, key);
@@ -15453,7 +12075,7 @@ static void array_bitset_container_andnot(const array_container_t *src_1,
/* Compute the andnot of src_1 and src_2 and write the result to
* src_1 */
-static void array_bitset_container_iandnot(array_container_t *src_1,
+void array_bitset_container_iandnot(array_container_t *src_1,
const bitset_container_t *src_2) {
array_bitset_container_andnot(src_1, src_2, src_1);
}
@@ -15463,7 +12085,7 @@ static void array_bitset_container_iandnot(array_container_t *src_1,
* Return true for a bitset result; false for array
*/
-static bool bitset_array_container_andnot(
+bool bitset_array_container_andnot(
const bitset_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -15491,7 +12113,7 @@ static bool bitset_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_array_container_iandnot(
+bool bitset_array_container_iandnot(
bitset_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -15515,7 +12137,7 @@ static bool bitset_array_container_iandnot(
* result true) or an array container.
*/
-static bool run_bitset_container_andnot(
+bool run_bitset_container_andnot(
const run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -15525,9 +12147,9 @@ static bool run_bitset_container_andnot(
// must be an array
array_container_t *answer = array_container_create_given_capacity(card);
answer->cardinality = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
- int run_value; for (run_value = rle.value; run_value <= rle.value + rle.length;
+ for (int run_value = rle.value; run_value <= rle.value + rle.length;
++run_value) {
if (!bitset_container_get(src_2, (uint16_t)run_value)) {
answer->array[answer->cardinality++] = (uint16_t)run_value;
@@ -15541,7 +12163,7 @@ static bool run_bitset_container_andnot(
bitset_container_t *answer = bitset_container_clone(src_2);
uint32_t last_pos = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
uint32_t start = rle.value;
@@ -15571,7 +12193,7 @@ static bool run_bitset_container_andnot(
* result true) or an array container.
*/
-static bool run_bitset_container_iandnot(
+bool run_bitset_container_iandnot(
run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -15588,7 +12210,7 @@ static bool run_bitset_container_iandnot(
* result true) or an array container.
*/
-static bool bitset_run_container_andnot(
+bool bitset_run_container_andnot(
const bitset_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -15596,7 +12218,7 @@ static bool bitset_run_container_andnot(
bitset_container_t *result = bitset_container_create();
bitset_container_copy(src_1, result);
- int32_t rlepos; for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
rle16_t rle = src_2->runs[rlepos];
bitset_reset_range(result->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -15619,13 +12241,13 @@ static bool bitset_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_run_container_iandnot(
+bool bitset_run_container_iandnot(
bitset_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
*dst = src_1;
- int32_t rlepos; for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
rle16_t rle = src_2->runs[rlepos];
bitset_reset_range(src_1->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -15655,7 +12277,7 @@ static int run_array_array_subtract(const run_container_t *rc,
int32_t in_array_pos =
-1; // since advanceUntil always assumes we start the search AFTER this
- int rlepos; for (rlepos = 0; rlepos < rc->n_runs; rlepos++) {
+ for (int rlepos = 0; rlepos < rc->n_runs; rlepos++) {
int32_t start = rc->runs[rlepos].value;
int32_t end = start + rc->runs[rlepos].length + 1;
@@ -15663,17 +12285,17 @@ static int run_array_array_subtract(const run_container_t *rc,
a_in->cardinality, (uint16_t)start);
if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
- int32_t i; for (i = start; i < end; ++i)
+ for (int32_t i = start; i < end; ++i)
a_out->array[out_card++] = (uint16_t)i;
} else {
uint16_t next_nonincluded = a_in->array[in_array_pos];
if (next_nonincluded >= end) {
// another case when run goes unaltered
- int32_t i; for (i = start; i < end; ++i)
+ for (int32_t i = start; i < end; ++i)
a_out->array[out_card++] = (uint16_t)i;
in_array_pos--; // ensure we see this item again if necessary
} else {
- int32_t i; for (i = start; i < end; ++i)
+ for (int32_t i = start; i < end; ++i)
if (i != next_nonincluded)
a_out->array[out_card++] = (uint16_t)i;
else // 0 should ensure we don't match
@@ -15692,7 +12314,7 @@ static int run_array_array_subtract(const run_container_t *rc,
* can become any type of container.
*/
-static int run_array_container_andnot(
+int run_array_container_andnot(
const run_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -15786,7 +12408,7 @@ static int run_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static int run_array_container_iandnot(
+int run_array_container_iandnot(
run_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -15798,7 +12420,7 @@ static int run_array_container_iandnot(
/* dst must be a valid array container, allowed to be src_1 */
-static void array_run_container_andnot(const array_container_t *src_1,
+void array_run_container_andnot(const array_container_t *src_1,
const run_container_t *src_2,
array_container_t *dst) {
// basically following Java impl as of June 2016
@@ -15818,7 +12440,7 @@ static void array_run_container_andnot(const array_container_t *src_1,
uint16_t val = 0;
int dest_card = 0;
- int i = 0; for (i = 0; i < src_1->cardinality; ++i) {
+ for (int i = 0; i < src_1->cardinality; ++i) {
val = src_1->array[i];
if (val < run_start)
dst->array[dest_card++] = val;
@@ -15844,7 +12466,7 @@ static void array_run_container_andnot(const array_container_t *src_1,
* can become any kind of container.
*/
-static void array_run_container_iandnot(array_container_t *src_1,
+void array_run_container_iandnot(array_container_t *src_1,
const run_container_t *src_2) {
array_run_container_andnot(src_1, src_2, src_1);
}
@@ -15853,7 +12475,7 @@ static void array_run_container_iandnot(array_container_t *src_1,
* can become any kind of container.
*/
-static int run_run_container_andnot(
+int run_run_container_andnot(
const run_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -15871,7 +12493,7 @@ static int run_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static int run_run_container_iandnot(
+int run_run_container_iandnot(
run_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -15885,7 +12507,7 @@ static int run_run_container_iandnot(
* dst is a valid array container and may be the same as src_1
*/
-static void array_array_container_andnot(const array_container_t *src_1,
+void array_array_container_andnot(const array_container_t *src_1,
const array_container_t *src_2,
array_container_t *dst) {
array_container_andnot(src_1, src_2, dst);
@@ -15893,7 +12515,7 @@ static void array_array_container_andnot(const array_container_t *src_1,
/* inplace array-array andnot will always be able to reuse the space of
* src_1 */
-static void array_array_container_iandnot(array_container_t *src_1,
+void array_array_container_iandnot(array_container_t *src_1,
const array_container_t *src_2) {
array_container_andnot(src_1, src_2, src_1);
}
@@ -15903,7 +12525,7 @@ static void array_array_container_iandnot(array_container_t *src_1,
* "dst is a bitset"
*/
-static bool bitset_bitset_container_andnot(
+bool bitset_bitset_container_andnot(
const bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -15926,7 +12548,7 @@ static bool bitset_bitset_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_bitset_container_iandnot(
+bool bitset_bitset_container_iandnot(
bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -15945,370 +12567,13 @@ static bool bitset_bitset_container_iandnot(
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_andnot.c */
-/* begin file src/containers/mixed_negation.c */
-/*
- * mixed_negation.c
- *
- */
-
-#include <assert.h>
-#include <string.h>
-
-
-#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
-#endif
-
-// TODO: make simplified and optimized negation code across
-// the full range.
-
-/* Negation across the entire range of the container.
- * Compute the negation of src and write the result
- * to *dst. The complement of a
- * sufficiently sparse set will always be dense and a hence a bitmap
-' * We assume that dst is pre-allocated and a valid bitset container
- * There can be no in-place version.
- */
-static void array_container_negation(const array_container_t *src,
- bitset_container_t *dst) {
- uint64_t card = UINT64_C(1 << 16);
- bitset_container_set_all(dst);
-
- if (src->cardinality == 0) {
- return;
- }
-
- dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array,
- (uint64_t)src->cardinality);
-}
-
-/* Negation across the entire range of the container
- * Compute the negation of src and write the result
- * to *dst. A true return value indicates a bitset result,
- * otherwise the result is an array container.
- * We assume that dst is not pre-allocated. In
- * case of failure, *dst will be NULL.
- */
-static bool bitset_container_negation(
- const bitset_container_t *src, container_t **dst
-){
- return bitset_container_negation_range(src, 0, (1 << 16), dst);
-}
-
-/* inplace version */
-/*
- * Same as bitset_container_negation except that if the output is to
- * be a
- * bitset_container_t, then src is modified and no allocation is made.
- * If the output is to be an array_container_t, then caller is responsible
- * to free the container.
- * In all cases, the result is in *dst.
- */
-static bool bitset_container_negation_inplace(
- bitset_container_t *src, container_t **dst
-){
- return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
-}
-
-/* Negation across the entire range of container
- * Compute the negation of src and write the result
- * to *dst. Return values are the *_TYPECODES as defined * in containers.h
- * We assume that dst is not pre-allocated. In
- * case of failure, *dst will be NULL.
- */
-static int run_container_negation(const run_container_t *src, container_t **dst) {
- return run_container_negation_range(src, 0, (1 << 16), dst);
-}
-
-/*
- * Same as run_container_negation except that if the output is to
- * be a
- * run_container_t, and has the capacity to hold the result,
- * then src is modified and no allocation is made.
- * In all cases, the result is in *dst.
- */
-static int run_container_negation_inplace(run_container_t *src, container_t **dst) {
- return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
-}
-
-/* Negation across a range of the container.
- * Compute the negation of src and write the result
- * to *dst. Returns true if the result is a bitset container
- * and false for an array container. *dst is not preallocated.
- */
-static bool array_container_negation_range(
- const array_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
- /* close port of the Java implementation */
- if (range_start >= range_end) {
- *dst = array_container_clone(src);
- return false;
- }
-
- int32_t start_index =
- binarySearch(src->array, src->cardinality, (uint16_t)range_start);
- if (start_index < 0) start_index = -start_index - 1;
-
- int32_t last_index =
- binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
- if (last_index < 0) last_index = -last_index - 2;
-
- const int32_t current_values_in_range = last_index - start_index + 1;
- const int32_t span_to_be_flipped = range_end - range_start;
- const int32_t new_values_in_range =
- span_to_be_flipped - current_values_in_range;
- const int32_t cardinality_change =
- new_values_in_range - current_values_in_range;
- const int32_t new_cardinality = src->cardinality + cardinality_change;
-
- if (new_cardinality > DEFAULT_MAX_SIZE) {
- bitset_container_t *temp = bitset_container_from_array(src);
- bitset_flip_range(temp->words, (uint32_t)range_start,
- (uint32_t)range_end);
- temp->cardinality = new_cardinality;
- *dst = temp;
- return true;
- }
-
- array_container_t *arr =
- array_container_create_given_capacity(new_cardinality);
- *dst = (container_t *)arr;
- if(new_cardinality == 0) {
- arr->cardinality = new_cardinality;
- return false; // we are done.
- }
- // copy stuff before the active area
- memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
-
- // work on the range
- int32_t out_pos = start_index, in_pos = start_index;
- int32_t val_in_range = range_start;
- for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
- if ((uint16_t)val_in_range != src->array[in_pos]) {
- arr->array[out_pos++] = (uint16_t)val_in_range;
- } else {
- ++in_pos;
- }
- }
- for (; val_in_range < range_end; ++val_in_range)
- arr->array[out_pos++] = (uint16_t)val_in_range;
-
- // content after the active range
- memcpy(arr->array + out_pos, src->array + (last_index + 1),
- (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
- arr->cardinality = new_cardinality;
- return false;
-}
-
-/* Even when the result would fit, it is unclear how to make an
- * inplace version without inefficient copying.
- */
-
-static bool array_container_negation_range_inplace(
- array_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
- bool ans = array_container_negation_range(src, range_start, range_end, dst);
- // TODO : try a real inplace version
- array_container_free(src);
- return ans;
-}
-
-/* Negation across a range of the container
- * Compute the negation of src and write the result
- * to *dst. A true return value indicates a bitset result,
- * otherwise the result is an array container.
- * We assume that dst is not pre-allocated. In
- * case of failure, *dst will be NULL.
- */
-static bool bitset_container_negation_range(
- const bitset_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
- // TODO maybe consider density-based estimate
- // and sometimes build result directly as array, with
- // conversion back to bitset if wrong. Or determine
- // actual result cardinality, then go directly for the known final cont.
-
- // keep computation using bitsets as long as possible.
- bitset_container_t *t = bitset_container_clone(src);
- bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end);
- t->cardinality = bitset_container_compute_cardinality(t);
-
- if (t->cardinality > DEFAULT_MAX_SIZE) {
- *dst = t;
- return true;
- } else {
- *dst = array_container_from_bitset(t);
- bitset_container_free(t);
- return false;
- }
-}
-
-/* inplace version */
-/*
- * Same as bitset_container_negation except that if the output is to
- * be a
- * bitset_container_t, then src is modified and no allocation is made.
- * If the output is to be an array_container_t, then caller is responsible
- * to free the container.
- * In all cases, the result is in *dst.
- */
-static bool bitset_container_negation_range_inplace(
- bitset_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
- bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end);
- src->cardinality = bitset_container_compute_cardinality(src);
- if (src->cardinality > DEFAULT_MAX_SIZE) {
- *dst = src;
- return true;
- }
- *dst = array_container_from_bitset(src);
- bitset_container_free(src);
- return false;
-}
-
-/* Negation across a range of container
- * Compute the negation of src and write the result
- * to *dst. Return values are the *_TYPECODES as defined * in containers.h
- * We assume that dst is not pre-allocated. In
- * case of failure, *dst will be NULL.
- */
-static int run_container_negation_range(
- const run_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
- uint8_t return_typecode;
-
- // follows the Java implementation
- if (range_end <= range_start) {
- *dst = run_container_clone(src);
- return RUN_CONTAINER_TYPE;
- }
-
- run_container_t *ans = run_container_create_given_capacity(
- src->n_runs + 1); // src->n_runs + 1);
- int k = 0;
- for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
- ans->runs[k] = src->runs[k];
- ans->n_runs++;
- }
-
- run_container_smart_append_exclusive(
- ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
-
- for (; k < src->n_runs; ++k) {
- run_container_smart_append_exclusive(ans, src->runs[k].value,
- src->runs[k].length);
- }
-
- *dst = convert_run_to_efficient_container(ans, &return_typecode);
- if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
-
- return return_typecode;
-}
-
-/*
- * Same as run_container_negation except that if the output is to
- * be a
- * run_container_t, and has the capacity to hold the result,
- * then src is modified and no allocation is made.
- * In all cases, the result is in *dst.
- */
-static int run_container_negation_range_inplace(
- run_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
- uint8_t return_typecode;
-
- if (range_end <= range_start) {
- *dst = src;
- return RUN_CONTAINER_TYPE;
- }
-
- // TODO: efficient special case when range is 0 to 65535 inclusive
-
- if (src->capacity == src->n_runs) {
- // no excess room. More checking to see if result can fit
- bool last_val_before_range = false;
- bool first_val_in_range = false;
- bool last_val_in_range = false;
- bool first_val_past_range = false;
-
- if (range_start > 0)
- last_val_before_range =
- run_container_contains(src, (uint16_t)(range_start - 1));
- first_val_in_range = run_container_contains(src, (uint16_t)range_start);
-
- if (last_val_before_range == first_val_in_range) {
- last_val_in_range =
- run_container_contains(src, (uint16_t)(range_end - 1));
- if (range_end != 0x10000)
- first_val_past_range =
- run_container_contains(src, (uint16_t)range_end);
-
- if (last_val_in_range ==
- first_val_past_range) { // no space for inplace
- int ans = run_container_negation_range(src, range_start,
- range_end, dst);
- run_container_free(src);
- return ans;
- }
- }
- }
- // all other cases: result will fit
-
- run_container_t *ans = src;
- int my_nbr_runs = src->n_runs;
-
- ans->n_runs = 0;
- int k = 0;
- for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
- // ans->runs[k] = src->runs[k]; (would be self-copy)
- ans->n_runs++;
- }
-
- // as with Java implementation, use locals to give self a buffer of depth 1
- rle16_t buffered = MAKE_RLE16(0, 0);
- rle16_t next = buffered;
- if (k < my_nbr_runs) buffered = src->runs[k];
-
- run_container_smart_append_exclusive(
- ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
-
- for (; k < my_nbr_runs; ++k) {
- if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
-
- run_container_smart_append_exclusive(ans, buffered.value,
- buffered.length);
- buffered = next;
- }
-
- *dst = convert_run_to_efficient_container(ans, &return_typecode);
- if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
-
- return return_typecode;
-}
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
-#endif
-/* end file src/containers/mixed_negation.c */
/* begin file src/containers/mixed_equal.c */
#ifdef __cplusplus
extern "C" { namespace roaring { namespace internal {
#endif
-static bool array_container_equal_bitset(const array_container_t* container1,
+bool array_container_equal_bitset(const array_container_t* container1,
const bitset_container_t* container2) {
if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
if (container2->cardinality != container1->cardinality) {
@@ -16316,7 +12581,7 @@ static bool array_container_equal_bitset(const array_container_t* container1,
}
}
int32_t pos = 0;
- int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
uint64_t w = container2->words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
@@ -16334,12 +12599,12 @@ static bool array_container_equal_bitset(const array_container_t* container1,
return (pos == container1->cardinality);
}
-static bool run_container_equals_array(const run_container_t* container1,
+bool run_container_equals_array(const run_container_t* container1,
const array_container_t* container2) {
if (run_container_cardinality(container1) != container2->cardinality)
return false;
int32_t pos = 0;
- int i = 0; for (i = 0; i < container1->n_runs; ++i) {
+ for (int i = 0; i < container1->n_runs; ++i) {
const uint32_t run_start = container1->runs[i].value;
const uint32_t le = container1->runs[i].length;
@@ -16356,7 +12621,7 @@ static bool run_container_equals_array(const run_container_t* container1,
return true;
}
-static bool run_container_equals_bitset(const run_container_t* container1,
+bool run_container_equals_bitset(const run_container_t* container1,
const bitset_container_t* container2) {
int run_card = run_container_cardinality(container1);
@@ -16367,7 +12632,7 @@ static bool run_container_equals_bitset(const run_container_t* container1,
return false;
}
- int32_t i; for (i = 0; i < container1->n_runs; i++) {
+ for (int32_t i = 0; i < container1->n_runs; i++) {
uint32_t begin = container1->runs[i].value;
if (container1->runs[i].length) {
uint32_t end = begin + container1->runs[i].length + 1;
@@ -16388,896 +12653,6 @@ static bool run_container_equals_bitset(const run_container_t* container1,
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_equal.c */
-/* begin file src/containers/bitset.c */
-/*
- * bitset.c
- *
- */
-#ifndef _POSIX_C_SOURCE
-#define _POSIX_C_SOURCE 200809L
-#endif
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
-#endif
-
-extern inline int bitset_container_cardinality(const bitset_container_t *bitset);
-extern inline bool bitset_container_nonzero_cardinality(bitset_container_t *bitset);
-extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos);
-extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
-extern inline bool bitset_container_get(const bitset_container_t *bitset,
- uint16_t pos);
-extern inline int32_t bitset_container_serialized_size_in_bytes(void);
-extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos);
-extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos);
-extern inline bool bitset_container_contains(const bitset_container_t *bitset,
- uint16_t pos);
-
-static void bitset_container_clear(bitset_container_t *bitset) {
- memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
- bitset->cardinality = 0;
-}
-
-static void bitset_container_set_all(bitset_container_t *bitset) {
- memset(bitset->words, INT64_C(-1),
- sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
- bitset->cardinality = (1 << 16);
-}
-
-
-
-/* Create a new bitset. Return NULL in case of failure. */
-static bitset_container_t *bitset_container_create(void) {
- bitset_container_t *bitset =
- (bitset_container_t *)ndpi_malloc(sizeof(bitset_container_t));
-
- if (!bitset) {
- return NULL;
- }
- // sizeof(__m256i) == 32
- bitset->words = (uint64_t *)roaring_bitmap_aligned_malloc(
- 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
- if (!bitset->words) {
- ndpi_free(bitset);
- return NULL;
- }
- bitset_container_clear(bitset);
- return bitset;
-}
-
-/* Copy one container into another. We assume that they are distinct. */
-static void bitset_container_copy(const bitset_container_t *source,
- bitset_container_t *dest) {
- dest->cardinality = source->cardinality;
- memcpy(dest->words, source->words,
- sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
-}
-
-static void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
- uint32_t max, uint16_t step) {
- if (step == 0) return; // refuse to crash
- if ((64 % step) == 0) { // step divides 64
- uint64_t mask = 0; // construct the repeated mask
- uint32_t value; for (value = (min % step); value < 64; value += step) {
- mask |= ((uint64_t)1 << value);
- }
- uint32_t firstword = min / 64;
- uint32_t endword = (max - 1) / 64;
- bitset->cardinality = (max - min + step - 1) / step;
- if (firstword == endword) {
- bitset->words[firstword] |=
- mask & (((~UINT64_C(0)) << (min % 64)) &
- ((~UINT64_C(0)) >> ((~max + 1) % 64)));
- return;
- }
- bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64));
- uint32_t i; for (i = firstword + 1; i < endword; i++)
- bitset->words[i] = mask;
- bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64));
- } else {
- uint32_t value; for (value = min; value < max; value += step) {
- bitset_container_add(bitset, value);
- }
- }
-}
-
-/* Free memory. */
-static void bitset_container_free(bitset_container_t *bitset) {
- if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise
- roaring_bitmap_aligned_free(bitset->words);
- bitset->words = NULL; // pedantic
- }
- ndpi_free(bitset);
-}
-
-/* duplicate container. */
-static bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
- bitset_container_t *bitset =
- (bitset_container_t *)ndpi_malloc(sizeof(bitset_container_t));
-
- if (!bitset) {
- return NULL;
- }
- // sizeof(__m256i) == 32
- bitset->words = (uint64_t *)roaring_bitmap_aligned_malloc(
- 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
- if (!bitset->words) {
- ndpi_free(bitset);
- return NULL;
- }
- bitset->cardinality = src->cardinality;
- memcpy(bitset->words, src->words,
- sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
- return bitset;
-}
-
-static void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
- uint32_t end) {
- bitset_set_range(bitset->words, begin, end);
- bitset->cardinality =
- bitset_container_compute_cardinality(bitset); // could be smarter
-}
-
-
-static bool bitset_container_intersect(const bitset_container_t *src_1,
- const bitset_container_t *src_2) {
- // could vectorize, but this is probably already quite fast in practice
- const uint64_t * __restrict__ words_1 = src_1->words;
- const uint64_t * __restrict__ words_2 = src_2->words;
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
- if((words_1[i] & words_2[i]) != 0) return true;
- }
- return false;
-}
-
-
-#ifdef CROARING_IS_X64
-#ifndef WORDS_IN_AVX2_REG
-#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
-#endif
-/* Get the number of bits set (force computation) */
-static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) {
- const uint64_t *words = bitset->words;
- int32_t sum = 0;
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
- sum += hamming(words[i]);
- sum += hamming(words[i + 1]);
- sum += hamming(words[i + 2]);
- sum += hamming(words[i + 3]);
- }
- return sum;
-}
-/* Get the number of bits set (force computation) */
-static int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
- if( croaring_avx2() ) {
- return (int) avx2_harley_seal_popcount256(
- (const __m256i *)bitset->words,
- BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
- } else {
- return _scalar_bitset_container_compute_cardinality(bitset);
-
- }
-}
-
-#elif defined(USENEON)
-static int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
- uint16x8_t n0 = vdupq_n_u16(0);
- uint16x8_t n1 = vdupq_n_u16(0);
- uint16x8_t n2 = vdupq_n_u16(0);
- uint16x8_t n3 = vdupq_n_u16(0);
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) {
- uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]);
- n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0))));
- uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]);
- n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1))));
- uint64x2_t c2 = vld1q_u64(&bitset->words[i + 4]);
- n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2))));
- uint64x2_t c3 = vld1q_u64(&bitset->words[i + 6]);
- n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3))));
- }
- uint64x2_t n = vdupq_n_u64(0);
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0)));
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1)));
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2)));
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3)));
- return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1);
-}
-
-#else // CROARING_IS_X64
-
-/* Get the number of bits set (force computation) */
-static int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
- const uint64_t *words = bitset->words;
- int32_t sum = 0;
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
- sum += hamming(words[i]);
- sum += hamming(words[i + 1]);
- sum += hamming(words[i + 2]);
- sum += hamming(words[i + 3]);
- }
- return sum;
-}
-
-#endif // CROARING_IS_X64
-
-#ifdef CROARING_IS_X64
-
-#define BITSET_CONTAINER_FN_REPEAT 8
-#ifndef WORDS_IN_AVX2_REG
-#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
-#endif // WORDS_IN_AVX2_REG
-#define LOOP_SIZE \
- BITSET_CONTAINER_SIZE_IN_WORDS / \
- ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT)
-
-/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
- result to bitsetout */
-// clang-format off
-#define AVX_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \
- neon_intrinsic, after) \
- static inline int _avx2_bitset_container_##opname##_nocard( \
- const bitset_container_t *src_1, const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words; \
- const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words; \
- /* not using the blocking optimization for some reason*/ \
- uint8_t *out = (uint8_t *)dst->words; \
- const int innerloop = 8; \
- size_t i; for (i = 0; \
- i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \
- i += innerloop) { \
- __m256i A1, A2, AO; \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)out, AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 32), AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 64), AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 96), AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 128), AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 160), AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 192), AO); \
- A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224)); \
- A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224)); \
- AO = avx_intrinsic(A2, A1); \
- _mm256_storeu_si256((__m256i *)(out + 224), AO); \
- out += 256; \
- words_1 += 256; \
- words_2 += 256; \
- } \
- dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
- return dst->cardinality; \
- }
-
-#define AVX_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \
- neon_intrinsic, after) \
- /* next, a version that updates cardinality*/ \
- static inline int _avx2_bitset_container_##opname(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words; \
- const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words; \
- __m256i *out = (__m256i *)dst->words; \
- dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \
- words_2, words_1, out, \
- BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \
- return dst->cardinality; \
- } \
-
-#define AVX_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \
- neon_intrinsic, after) \
- /* next, a version that just computes the cardinality*/ \
- static inline int _avx2_bitset_container_##opname##_justcard( \
- const bitset_container_t *src_1, const bitset_container_t *src_2) { \
- const __m256i *__restrict__ data1 = (const __m256i *)src_1->words; \
- const __m256i *__restrict__ data2 = (const __m256i *)src_2->words; \
- return (int)avx2_harley_seal_popcount256_##opname( \
- data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \
- }
-
-
-// we duplicate the function because other containers use the "or" term, makes API more consistent
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-// we duplicate the function because other containers use the "intersection" term, makes API more consistent
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-// we duplicate the function because other containers use the "or" term, makes API more consistent
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-// we duplicate the function because other containers use the "intersection" term, makes API more consistent
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-// we duplicate the function because other containers use the "or" term, makes API more consistent
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-// we duplicate the function because other containers use the "intersection" term, makes API more consistent
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
-
-
-#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \
- neon_intrinsic) \
- static inline int _scalar_bitset_container_##opname(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint64_t *__restrict__ words_1 = src_1->words; \
- const uint64_t *__restrict__ words_2 = src_2->words; \
- uint64_t *out = dst->words; \
- int32_t sum = 0; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
- const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
- word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
- out[i] = word_1; \
- out[i + 1] = word_2; \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
- } \
- dst->cardinality = sum; \
- return dst->cardinality; \
- } \
- static inline int _scalar_bitset_container_##opname##_nocard( \
- const bitset_container_t *src_1, const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint64_t *__restrict__ words_1 = src_1->words; \
- const uint64_t *__restrict__ words_2 = src_2->words; \
- uint64_t *out = dst->words; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
- out[i] = (words_1[i])opsymbol(words_2[i]); \
- } \
- dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
- return dst->cardinality; \
- } \
- static inline int _scalar_bitset_container_##opname##_justcard( \
- const bitset_container_t *src_1, const bitset_container_t *src_2) { \
- const uint64_t *__restrict__ words_1 = src_1->words; \
- const uint64_t *__restrict__ words_2 = src_2->words; \
- int32_t sum = 0; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
- const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
- word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
- } \
- return sum; \
- }
-
-// we duplicate the function because other containers use the "or" term, makes API more consistent
-SCALAR_BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
-SCALAR_BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
-
-// we duplicate the function because other containers use the "intersection" term, makes API more consistent
-SCALAR_BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
-SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
-
-SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
-SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
-
-
-#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
- static int bitset_container_##opname(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- if ( croaring_avx2() ) { \
- return _avx2_bitset_container_##opname(src_1, src_2, dst); \
- } else { \
- return _scalar_bitset_container_##opname(src_1, src_2, dst); \
- } \
- } \
- static int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- if ( croaring_avx2() ) { \
- return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \
- } else { \
- return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \
- } \
- } \
- static int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
- const bitset_container_t *src_2) { \
- if ((croaring_detect_supported_architectures() & CROARING_AVX2) == \
- CROARING_AVX2) { \
- return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \
- } else { \
- return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \
- } \
- }
-
-
-
-#elif defined(USENEON)
-
-#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
-static int bitset_container_##opname(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
- const uint64_t * __restrict__ words_2 = src_2->words; \
- uint64_t *out = dst->words; \
- uint16x8_t n0 = vdupq_n_u16(0); \
- uint16x8_t n1 = vdupq_n_u16(0); \
- uint16x8_t n2 = vdupq_n_u16(0); \
- uint16x8_t n3 = vdupq_n_u16(0); \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
- uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
- vld1q_u64(&words_2[i + 0])); \
- n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
- vst1q_u64(&out[i + 0], c0); \
- uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
- vld1q_u64(&words_2[i + 2])); \
- n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
- vst1q_u64(&out[i + 2], c1); \
- uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
- vld1q_u64(&words_2[i + 4])); \
- n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
- vst1q_u64(&out[i + 4], c2); \
- uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
- vld1q_u64(&words_2[i + 6])); \
- n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
- vst1q_u64(&out[i + 6], c3); \
- } \
- uint64x2_t n = vdupq_n_u64(0); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
- dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
- return dst->cardinality; \
-} \
-static int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
- const uint64_t * __restrict__ words_2 = src_2->words; \
- uint64_t *out = dst->words; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
- vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
- vld1q_u64(&words_2[i + 0]))); \
- vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
- vld1q_u64(&words_2[i + 2]))); \
- vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
- vld1q_u64(&words_2[i + 4]))); \
- vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
- vld1q_u64(&words_2[i + 6]))); \
- } \
- dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
- return dst->cardinality; \
-} \
-static int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
- const bitset_container_t *src_2) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
- const uint64_t * __restrict__ words_2 = src_2->words; \
- uint16x8_t n0 = vdupq_n_u16(0); \
- uint16x8_t n1 = vdupq_n_u16(0); \
- uint16x8_t n2 = vdupq_n_u16(0); \
- uint16x8_t n3 = vdupq_n_u16(0); \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
- uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
- vld1q_u64(&words_2[i + 0])); \
- n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
- uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
- vld1q_u64(&words_2[i + 2])); \
- n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
- uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
- vld1q_u64(&words_2[i + 4])); \
- n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
- uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
- vld1q_u64(&words_2[i + 6])); \
- n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
- } \
- uint64x2_t n = vdupq_n_u64(0); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
- n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
- return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
-}
-
-#else
-
-#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
-static int bitset_container_##opname(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
- const uint64_t * __restrict__ words_2 = src_2->words; \
- uint64_t *out = dst->words; \
- int32_t sum = 0; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
- const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
- word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
- out[i] = word_1; \
- out[i + 1] = word_2; \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
- } \
- dst->cardinality = sum; \
- return dst->cardinality; \
-} \
-static int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
- const bitset_container_t *src_2, \
- bitset_container_t *dst) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
- const uint64_t * __restrict__ words_2 = src_2->words; \
- uint64_t *out = dst->words; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
- out[i] = (words_1[i])opsymbol(words_2[i]); \
- } \
- dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
- return dst->cardinality; \
-} \
-static int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
- const bitset_container_t *src_2) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
- const uint64_t * __restrict__ words_2 = src_2->words; \
- int32_t sum = 0; \
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
- const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
- word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
- } \
- return sum; \
-}
-
-#endif // CROARING_IS_X64
-
-// we duplicate the function because other containers use the "or" term, makes API more consistent
-BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
-BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
-
-// we duplicate the function because other containers use the "intersection" term, makes API more consistent
-BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
-BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
-
-BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
-BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
-// clang-format On
-
-
-static int bitset_container_to_uint32_array(
- uint32_t *out,
- const bitset_container_t *bc,
- uint32_t base
-){
-#ifdef CROARING_IS_X64
- if(( croaring_avx2() ) && (bc->cardinality >= 8192)) // heuristic
- return (int) bitset_extract_setbits_avx2(bc->words,
- BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
- else
- return (int) bitset_extract_setbits(bc->words,
- BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
-#else
- return (int) bitset_extract_setbits(bc->words,
- BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
-#endif
-}
-
-/*
- * Print this container using printf (useful for debugging).
- */
-static void bitset_container_printf(const bitset_container_t * v) {
- printf("{");
- uint32_t base = 0;
- bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
- uint64_t w = v->words[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- if(iamfirst) {// predicted to be false
- printf("%u",base + r);
- iamfirst = false;
- } else {
- printf(",%u",base + r);
- }
- w ^= t;
- }
- base += 64;
- }
- printf("}");
-}
-
-
-/*
- * Print this container using printf as a comma-separated list of 32-bit integers starting at base.
- */
-static void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) {
- bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
- uint64_t w = v->words[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- if(iamfirst) {// predicted to be false
- printf("%u", r + base);
- iamfirst = false;
- } else {
- printf(",%u",r + base);
- }
- w ^= t;
- }
- base += 64;
- }
-}
-
-
-// TODO: use the fast lower bound, also
-static int bitset_container_number_of_runs(bitset_container_t *bc) {
- int num_runs = 0;
- uint64_t next_word = bc->words[0];
-
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
- uint64_t word = next_word;
- next_word = bc->words[i+1];
- num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
- }
-
- uint64_t word = next_word;
- num_runs += hamming((~word) & (word << 1));
- if((word & 0x8000000000000000ULL) != 0)
- num_runs++;
- return num_runs;
-}
-
-
-static int32_t bitset_container_write(const bitset_container_t *container,
- char *buf) {
- memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
- return bitset_container_size_in_bytes(container);
-}
-
-
-static int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
- const char *buf) {
- container->cardinality = cardinality;
- memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
- return bitset_container_size_in_bytes(container);
-}
-
-static bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
- int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
- uint64_t w = cont->words[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- if(!iterator(r + base, ptr)) return false;
- w ^= t;
- }
- base += 64;
- }
- return true;
-}
-
-static bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) {
- int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
- uint64_t w = cont->words[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
- w ^= t;
- }
- base += 64;
- }
- return true;
-}
-
-#ifdef CROARING_IS_X64
-CROARING_TARGET_AVX2
-static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
- const __m256i *ptr1 = (const __m256i*)container1->words;
- const __m256i *ptr2 = (const __m256i*)container2->words;
- size_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
- __m256i r1 = _mm256_load_si256(ptr1+i);
- __m256i r2 = _mm256_load_si256(ptr2+i);
- int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
- if ((uint32_t)mask != UINT32_MAX) {
- return false;
- }
- }
- return true;
-}
-CROARING_UNTARGET_REGION
-#endif // CROARING_IS_X64
-
-static bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
- if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
- if(container1->cardinality != container2->cardinality) {
- return false;
- }
- if (container1->cardinality == INT32_C(0x10000)) {
- return true;
- }
- }
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
- return _avx2_bitset_container_equals(container1, container2);
- }
-#endif
- return memcmp(container1->words,
- container2->words,
- BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0;
-}
-
-static bool bitset_container_is_subset(const bitset_container_t *container1,
- const bitset_container_t *container2) {
- if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
- if(container1->cardinality > container2->cardinality) {
- return false;
- }
- }
- int32_t i ; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
- if((container1->words[i] & container2->words[i]) != container1->words[i]) {
- return false;
- }
- }
- return true;
-}
-
-static bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) {
- int card = bitset_container_cardinality(container);
- if(rank >= *start_rank + card) {
- *start_rank += card;
- return false;
- }
- const uint64_t *words = container->words;
- int32_t size;
- int i = 0; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
- size = hamming(words[i]);
- if(rank <= *start_rank + size) {
- uint64_t w = container->words[i];
- uint16_t base = i*64;
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- if(*start_rank == rank) {
- *element = r+base;
- return true;
- }
- w ^= t;
- *start_rank += 1;
- }
- }
- else
- *start_rank += size;
- }
- assert(false);
- __builtin_unreachable();
-}
-
-
-/* Returns the smallest value (assumes not empty) */
-static uint16_t bitset_container_minimum(const bitset_container_t *container) {
- int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
- uint64_t w = container->words[i];
- if (w != 0) {
- int r = __builtin_ctzll(w);
- return r + i * 64;
- }
- }
- return UINT16_MAX;
-}
-
-/* Returns the largest value (assumes not empty) */
-static uint16_t bitset_container_maximum(const bitset_container_t *container) {
- int32_t i; for (i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
- uint64_t w = container->words[i];
- if (w != 0) {
- int r = __builtin_clzll(w);
- return i * 64 + 63 - r;
- }
- }
- return 0;
-}
-
-/* Returns the number of values equal or smaller than x */
-static int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
- // credit: aqrit
- int sum = 0;
- int i = 0, end;
- for (end = x / 64; i < end; i++){
- sum += hamming(container->words[i]);
- }
- uint64_t lastword = container->words[i];
- uint64_t lastpos = UINT64_C(1) << (x % 64);
- uint64_t mask = lastpos + lastpos - 1; // smear right
- sum += hamming(lastword & mask);
- return sum;
-}
-
-/* Returns the index of the first value equal or larger than x, or -1 */
-static int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
- uint32_t x32 = x;
- uint32_t k = x32 / 64;
- uint64_t word = container->words[k];
- const int diff = x32 - k * 64; // in [0,64)
- word = (word >> diff) << diff; // a mask is faster, but we don't care
- while(word == 0) {
- k++;
- if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
- word = container->words[k];
- }
- return k * 64 + __builtin_ctzll(word);
-}
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
-#endif
-/* end file src/containers/bitset.c */
/* begin file src/containers/mixed_intersection.c */
/*
* mixed_intersection.c
@@ -17291,7 +12666,7 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the intersection of src_1 and src_2 and write the result to
* dst. */
-static void array_bitset_container_intersection(const array_container_t *src_1,
+void array_bitset_container_intersection(const array_container_t *src_1,
const bitset_container_t *src_2,
array_container_t *dst) {
if (dst->capacity < src_1->cardinality) {
@@ -17299,7 +12674,7 @@ static void array_bitset_container_intersection(const array_container_t *src_1,
}
int32_t newcard = 0; // dst could be src_1
const int32_t origcard = src_1->cardinality;
- int i = 0; for (i = 0; i < origcard; ++i) {
+ for (int i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
// this branchless approach is much faster...
dst->array[newcard] = key;
@@ -17321,11 +12696,11 @@ static void array_bitset_container_intersection(const array_container_t *src_1,
}
/* Compute the size of the intersection of src_1 and src_2. */
-static int array_bitset_container_intersection_cardinality(
+int array_bitset_container_intersection_cardinality(
const array_container_t *src_1, const bitset_container_t *src_2) {
int32_t newcard = 0;
const int32_t origcard = src_1->cardinality;
- int i = 0; for (i = 0; i < origcard; ++i) {
+ for (int i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
newcard += bitset_container_contains(src_2, key);
}
@@ -17333,10 +12708,10 @@ static int array_bitset_container_intersection_cardinality(
}
-static bool array_bitset_container_intersect(const array_container_t *src_1,
+bool array_bitset_container_intersect(const array_container_t *src_1,
const bitset_container_t *src_2) {
const int32_t origcard = src_1->cardinality;
- int i = 0; for (i = 0; i < origcard; ++i) {
+ for (int i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
if(bitset_container_contains(src_2, key)) return true;
}
@@ -17346,7 +12721,7 @@ static bool array_bitset_container_intersect(const array_container_t *src_1,
/* Compute the intersection of src_1 and src_2 and write the result to
* dst. It is allowed for dst to be equal to src_1. We assume that dst is a
* valid container. */
-static void array_run_container_intersection(const array_container_t *src_1,
+void array_run_container_intersection(const array_container_t *src_1,
const run_container_t *src_2,
array_container_t *dst) {
if (run_container_is_full(src_2)) {
@@ -17390,7 +12765,7 @@ static void array_run_container_intersection(const array_container_t *src_1,
* *dst. If the result is true then the result is a bitset_container_t
* otherwise is a array_container_t. If *dst == src_2, an in-place processing
* is attempted.*/
-static bool run_bitset_container_intersection(
+bool run_bitset_container_intersection(
const run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -17410,10 +12785,10 @@ static bool run_bitset_container_intersection(
if (*dst == NULL) {
return false;
}
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
- uint32_t endofrun = (uint32_t)rle.value + rle.length, runValue;
- for (runValue = rle.value; runValue <= endofrun;
+ uint32_t endofrun = (uint32_t)rle.value + rle.length;
+ for (uint32_t runValue = rle.value; runValue <= endofrun;
++runValue) {
answer->array[answer->cardinality] = (uint16_t)runValue;
answer->cardinality +=
@@ -17425,7 +12800,7 @@ static bool run_bitset_container_intersection(
if (*dst == src_2) { // we attempt in-place
bitset_container_t *answer = CAST_bitset(*dst);
uint32_t start = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
const rle16_t rle = src_1->runs[rlepos];
uint32_t end = rle.value;
bitset_reset_range(src_2->words, start, end);
@@ -17454,7 +12829,7 @@ static bool run_bitset_container_intersection(
return true;
}
uint32_t start = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
const rle16_t rle = src_1->runs[rlepos];
uint32_t end = rle.value;
bitset_reset_range(answer->words, start, end);
@@ -17479,7 +12854,7 @@ static bool run_bitset_container_intersection(
}
/* Compute the size of the intersection between src_1 and src_2 . */
-static int array_run_container_intersection_cardinality(const array_container_t *src_1,
+int array_run_container_intersection_cardinality(const array_container_t *src_1,
const run_container_t *src_2) {
if (run_container_is_full(src_2)) {
return src_1->cardinality;
@@ -17514,13 +12889,13 @@ static int array_run_container_intersection_cardinality(const array_container_t
/* Compute the intersection between src_1 and src_2
**/
-static int run_bitset_container_intersection_cardinality(
+int run_bitset_container_intersection_cardinality(
const run_container_t *src_1, const bitset_container_t *src_2) {
if (run_container_is_full(src_1)) {
return bitset_container_cardinality(src_2);
}
int answer = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
answer +=
bitset_lenrange_cardinality(src_2->words, rle.value, rle.length);
@@ -17529,7 +12904,7 @@ static int run_bitset_container_intersection_cardinality(
}
-static bool array_run_container_intersect(const array_container_t *src_1,
+bool array_run_container_intersect(const array_container_t *src_1,
const run_container_t *src_2) {
if( run_container_is_full(src_2) ) {
return !array_container_empty(src_1);
@@ -17562,12 +12937,12 @@ static bool array_run_container_intersect(const array_container_t *src_1,
/* Compute the intersection between src_1 and src_2
**/
-static bool run_bitset_container_intersect(const run_container_t *src_1,
+bool run_bitset_container_intersect(const run_container_t *src_1,
const bitset_container_t *src_2) {
if( run_container_is_full(src_1) ) {
return !bitset_container_empty(src_2);
}
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
if(!bitset_lenrange_empty(src_2->words, rle.value,rle.length)) return true;
}
@@ -17579,7 +12954,7 @@ static bool run_bitset_container_intersect(const run_container_t *src_1,
* to *dst. If the return function is true, the result is a bitset_container_t
* otherwise is a array_container_t.
*/
-static bool bitset_bitset_container_intersection(
+bool bitset_bitset_container_intersection(
const bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -17602,7 +12977,7 @@ static bool bitset_bitset_container_intersection(
return false; // not a bitset
}
-static bool bitset_bitset_container_intersection_inplace(
+bool bitset_bitset_container_intersection_inplace(
bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -17627,20 +13002,377 @@ static bool bitset_bitset_container_intersection_inplace(
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_intersection.c */
+/* begin file src/containers/mixed_negation.c */
+/*
+ * mixed_negation.c
+ *
+ */
+
+#include <assert.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// TODO: make simplified and optimized negation code across
+// the full range.
+
+/* Negation across the entire range of the container.
+ * Compute the negation of src and write the result
+ * to *dst. The complement of a
+ * sufficiently sparse set will always be dense and a hence a bitmap
+' * We assume that dst is pre-allocated and a valid bitset container
+ * There can be no in-place version.
+ */
+void array_container_negation(const array_container_t *src,
+ bitset_container_t *dst) {
+ uint64_t card = UINT64_C(1 << 16);
+ bitset_container_set_all(dst);
+
+ if (src->cardinality == 0) {
+ return;
+ }
+
+ dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array,
+ (uint64_t)src->cardinality);
+}
+
+/* Negation across the entire range of the container
+ * Compute the negation of src and write the result
+ * to *dst. A true return value indicates a bitset result,
+ * otherwise the result is an array container.
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_container_negation(
+ const bitset_container_t *src, container_t **dst
+){
+ return bitset_container_negation_range(src, 0, (1 << 16), dst);
+}
+
+/* inplace version */
+/*
+ * Same as bitset_container_negation except that if the output is to
+ * be a
+ * bitset_container_t, then src is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_container_negation_inplace(
+ bitset_container_t *src, container_t **dst
+){
+ return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
+}
+
+/* Negation across the entire range of container
+ * Compute the negation of src and write the result
+ * to *dst. Return values are the *_TYPECODES as defined * in containers.h
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+int run_container_negation(const run_container_t *src, container_t **dst) {
+ return run_container_negation_range(src, 0, (1 << 16), dst);
+}
+
+/*
+ * Same as run_container_negation except that if the output is to
+ * be a
+ * run_container_t, and has the capacity to hold the result,
+ * then src is modified and no allocation is made.
+ * In all cases, the result is in *dst.
+ */
+int run_container_negation_inplace(run_container_t *src, container_t **dst) {
+ return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
+}
+
+/* Negation across a range of the container.
+ * Compute the negation of src and write the result
+ * to *dst. Returns true if the result is a bitset container
+ * and false for an array container. *dst is not preallocated.
+ */
+bool array_container_negation_range(
+ const array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ /* close port of the Java implementation */
+ if (range_start >= range_end) {
+ *dst = array_container_clone(src);
+ return false;
+ }
+
+ int32_t start_index =
+ binarySearch(src->array, src->cardinality, (uint16_t)range_start);
+ if (start_index < 0) start_index = -start_index - 1;
+
+ int32_t last_index =
+ binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
+ if (last_index < 0) last_index = -last_index - 2;
+
+ const int32_t current_values_in_range = last_index - start_index + 1;
+ const int32_t span_to_be_flipped = range_end - range_start;
+ const int32_t new_values_in_range =
+ span_to_be_flipped - current_values_in_range;
+ const int32_t cardinality_change =
+ new_values_in_range - current_values_in_range;
+ const int32_t new_cardinality = src->cardinality + cardinality_change;
+
+ if (new_cardinality > DEFAULT_MAX_SIZE) {
+ bitset_container_t *temp = bitset_container_from_array(src);
+ bitset_flip_range(temp->words, (uint32_t)range_start,
+ (uint32_t)range_end);
+ temp->cardinality = new_cardinality;
+ *dst = temp;
+ return true;
+ }
+
+ array_container_t *arr =
+ array_container_create_given_capacity(new_cardinality);
+ *dst = (container_t *)arr;
+ if(new_cardinality == 0) {
+ arr->cardinality = new_cardinality;
+ return false; // we are done.
+ }
+ // copy stuff before the active area
+ memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
+
+ // work on the range
+ int32_t out_pos = start_index, in_pos = start_index;
+ int32_t val_in_range = range_start;
+ for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
+ if ((uint16_t)val_in_range != src->array[in_pos]) {
+ arr->array[out_pos++] = (uint16_t)val_in_range;
+ } else {
+ ++in_pos;
+ }
+ }
+ for (; val_in_range < range_end; ++val_in_range)
+ arr->array[out_pos++] = (uint16_t)val_in_range;
+
+ // content after the active range
+ memcpy(arr->array + out_pos, src->array + (last_index + 1),
+ (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
+ arr->cardinality = new_cardinality;
+ return false;
+}
+
+/* Even when the result would fit, it is unclear how to make an
+ * inplace version without inefficient copying.
+ */
+
+bool array_container_negation_range_inplace(
+ array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ bool ans = array_container_negation_range(src, range_start, range_end, dst);
+ // TODO : try a real inplace version
+ array_container_free(src);
+ return ans;
+}
+
+/* Negation across a range of the container
+ * Compute the negation of src and write the result
+ * to *dst. A true return value indicates a bitset result,
+ * otherwise the result is an array container.
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_container_negation_range(
+ const bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ // TODO maybe consider density-based estimate
+ // and sometimes build result directly as array, with
+ // conversion back to bitset if wrong. Or determine
+ // actual result cardinality, then go directly for the known final cont.
+
+ // keep computation using bitsets as long as possible.
+ bitset_container_t *t = bitset_container_clone(src);
+ bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end);
+ t->cardinality = bitset_container_compute_cardinality(t);
+
+ if (t->cardinality > DEFAULT_MAX_SIZE) {
+ *dst = t;
+ return true;
+ } else {
+ *dst = array_container_from_bitset(t);
+ bitset_container_free(t);
+ return false;
+ }
+}
+
+/* inplace version */
+/*
+ * Same as bitset_container_negation except that if the output is to
+ * be a
+ * bitset_container_t, then src is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_container_negation_range_inplace(
+ bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end);
+ src->cardinality = bitset_container_compute_cardinality(src);
+ if (src->cardinality > DEFAULT_MAX_SIZE) {
+ *dst = src;
+ return true;
+ }
+ *dst = array_container_from_bitset(src);
+ bitset_container_free(src);
+ return false;
+}
+
+/* Negation across a range of container
+ * Compute the negation of src and write the result
+ * to *dst. Return values are the *_TYPECODES as defined * in containers.h
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+int run_container_negation_range(
+ const run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ uint8_t return_typecode;
+
+ // follows the Java implementation
+ if (range_end <= range_start) {
+ *dst = run_container_clone(src);
+ return RUN_CONTAINER_TYPE;
+ }
+
+ run_container_t *ans = run_container_create_given_capacity(
+ src->n_runs + 1); // src->n_runs + 1);
+ int k = 0;
+ for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
+ ans->runs[k] = src->runs[k];
+ ans->n_runs++;
+ }
+
+ run_container_smart_append_exclusive(
+ ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
+
+ for (; k < src->n_runs; ++k) {
+ run_container_smart_append_exclusive(ans, src->runs[k].value,
+ src->runs[k].length);
+ }
+
+ *dst = convert_run_to_efficient_container(ans, &return_typecode);
+ if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
+
+ return return_typecode;
+}
+
+/*
+ * Same as run_container_negation except that if the output is to
+ * be a
+ * run_container_t, and has the capacity to hold the result,
+ * then src is modified and no allocation is made.
+ * In all cases, the result is in *dst.
+ */
+int run_container_negation_range_inplace(
+ run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ uint8_t return_typecode;
+
+ if (range_end <= range_start) {
+ *dst = src;
+ return RUN_CONTAINER_TYPE;
+ }
+
+ // TODO: efficient special case when range is 0 to 65535 inclusive
+
+ if (src->capacity == src->n_runs) {
+ // no excess room. More checking to see if result can fit
+ bool last_val_before_range = false;
+ bool first_val_in_range = false;
+ bool last_val_in_range = false;
+ bool first_val_past_range = false;
+
+ if (range_start > 0)
+ last_val_before_range =
+ run_container_contains(src, (uint16_t)(range_start - 1));
+ first_val_in_range = run_container_contains(src, (uint16_t)range_start);
+
+ if (last_val_before_range == first_val_in_range) {
+ last_val_in_range =
+ run_container_contains(src, (uint16_t)(range_end - 1));
+ if (range_end != 0x10000)
+ first_val_past_range =
+ run_container_contains(src, (uint16_t)range_end);
+
+ if (last_val_in_range ==
+ first_val_past_range) { // no space for inplace
+ int ans = run_container_negation_range(src, range_start,
+ range_end, dst);
+ run_container_free(src);
+ return ans;
+ }
+ }
+ }
+ // all other cases: result will fit
+
+ run_container_t *ans = src;
+ int my_nbr_runs = src->n_runs;
+
+ ans->n_runs = 0;
+ int k = 0;
+ for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
+ // ans->runs[k] = src->runs[k]; (would be self-copy)
+ ans->n_runs++;
+ }
+
+ // as with Java implementation, use locals to give self a buffer of depth 1
+ rle16_t buffered = MAKE_RLE16(0, 0);
+ rle16_t next = buffered;
+ if (k < my_nbr_runs) buffered = src->runs[k];
+
+ run_container_smart_append_exclusive(
+ ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
+
+ for (; k < my_nbr_runs; ++k) {
+ if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
+
+ run_container_smart_append_exclusive(ans, buffered.value,
+ buffered.length);
+ buffered = next;
+ }
+
+ *dst = convert_run_to_efficient_container(ans, &return_typecode);
+ if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
+
+ return return_typecode;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_negation.c */
/* begin file src/containers/mixed_subset.c */
#ifdef __cplusplus
extern "C" { namespace roaring { namespace internal {
#endif
-static bool array_container_is_subset_bitset(const array_container_t* container1,
+bool array_container_is_subset_bitset(const array_container_t* container1,
const bitset_container_t* container2) {
if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
if (container2->cardinality < container1->cardinality) {
return false;
}
}
- int i = 0; for (i = 0; i < container1->cardinality; ++i) {
+ for (int i = 0; i < container1->cardinality; ++i) {
if (!bitset_container_contains(container2, container1->array[i])) {
return false;
}
@@ -17648,19 +13380,19 @@ static bool array_container_is_subset_bitset(const array_container_t* container1
return true;
}
-static bool run_container_is_subset_array(const run_container_t* container1,
+bool run_container_is_subset_array(const run_container_t* container1,
const array_container_t* container2) {
if (run_container_cardinality(container1) > container2->cardinality)
return false;
int32_t start_pos = -1, stop_pos = -1;
- int i = 0; for (i = 0; i < container1->n_runs; ++i) {
+ for (int i = 0; i < container1->n_runs; ++i) {
int32_t start = container1->runs[i].value;
int32_t stop = start + container1->runs[i].length;
start_pos = advanceUntil(container2->array, stop_pos,
container2->cardinality, start);
stop_pos = advanceUntil(container2->array, stop_pos,
container2->cardinality, stop);
- if (start_pos == container2->cardinality) {
+ if (stop_pos == container2->cardinality) {
return false;
} else if (stop_pos - start_pos != stop - start ||
container2->array[start_pos] != start ||
@@ -17671,7 +13403,7 @@ static bool run_container_is_subset_array(const run_container_t* container1,
return true;
}
-static bool array_container_is_subset_run(const array_container_t* container1,
+bool array_container_is_subset_run(const array_container_t* container1,
const run_container_t* container2) {
if (container1->cardinality > run_container_cardinality(container2))
return false;
@@ -17694,7 +13426,7 @@ static bool array_container_is_subset_run(const array_container_t* container1,
}
}
-static bool run_container_is_subset_bitset(const run_container_t* container1,
+bool run_container_is_subset_bitset(const run_container_t* container1,
const bitset_container_t* container2) {
// todo: this code could be much faster
if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
@@ -17708,10 +13440,10 @@ static bool run_container_is_subset_bitset(const run_container_t* container1,
return false;
}
}
- int i = 0; for (i = 0; i < container1->n_runs; ++i) {
+ for (int i = 0; i < container1->n_runs; ++i) {
uint32_t run_start = container1->runs[i].value;
uint32_t le = container1->runs[i].length;
- uint32_t j; for (j = run_start; j <= run_start + le; ++j) {
+ for (uint32_t j = run_start; j <= run_start + le; ++j) {
if (!bitset_container_contains(container2, j)) {
return false;
}
@@ -17720,7 +13452,7 @@ static bool run_container_is_subset_bitset(const run_container_t* container1,
return true;
}
-static bool bitset_container_is_subset_run(const bitset_container_t* container1,
+bool bitset_container_is_subset_run(const bitset_container_t* container1,
const run_container_t* container2) {
// todo: this code could be much faster
if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) {
@@ -17768,6 +13500,305 @@ static bool bitset_container_is_subset_run(const bitset_container_t* container1,
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_subset.c */
+/* begin file src/containers/mixed_union.c */
+/*
+ * mixed_union.c
+ *
+ */
+
+#include <assert.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. */
+void array_bitset_container_union(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ dst->cardinality = (int32_t)bitset_set_list_withcard(
+ dst->words, dst->cardinality, src_1->array, src_1->cardinality);
+}
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. It is allowed for src_2 to be dst. This version does not
+ * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
+void array_bitset_container_lazy_union(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ bitset_set_list(dst->words, src_1->array, src_1->cardinality);
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+}
+
+void run_bitset_container_union(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ assert(!run_container_is_full(src_1)); // catch this case upstream
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ bitset_set_lenrange(dst->words, rle.value, rle.length);
+ }
+ dst->cardinality = bitset_container_compute_cardinality(dst);
+}
+
+void run_bitset_container_lazy_union(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ assert(!run_container_is_full(src_1)); // catch this case upstream
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ bitset_set_lenrange(dst->words, rle.value, rle.length);
+ }
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+}
+
+// why do we leave the result as a run container??
+void array_run_container_union(const array_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst) {
+ if (run_container_is_full(src_2)) {
+ run_container_copy(src_2, dst);
+ return;
+ }
+ // TODO: see whether the "2*" is spurious
+ run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false);
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ rle16_t previousrle;
+ if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
+ previousrle = run_container_append_first(dst, src_2->runs[rlepos]);
+ rlepos++;
+ } else {
+ previousrle =
+ run_container_append_value_first(dst, src_1->array[arraypos]);
+ arraypos++;
+ }
+ while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
+ if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
+ run_container_append(dst, src_2->runs[rlepos], &previousrle);
+ rlepos++;
+ } else {
+ run_container_append_value(dst, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ }
+ if (arraypos < src_1->cardinality) {
+ while (arraypos < src_1->cardinality) {
+ run_container_append_value(dst, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ } else {
+ while (rlepos < src_2->n_runs) {
+ run_container_append(dst, src_2->runs[rlepos], &previousrle);
+ rlepos++;
+ }
+ }
+}
+
+void array_run_container_inplace_union(const array_container_t *src_1,
+ run_container_t *src_2) {
+ if (run_container_is_full(src_2)) {
+ return;
+ }
+ const int32_t maxoutput = src_1->cardinality + src_2->n_runs;
+ const int32_t neededcapacity = maxoutput + src_2->n_runs;
+ if (src_2->capacity < neededcapacity)
+ run_container_grow(src_2, neededcapacity, true);
+ memmove(src_2->runs + maxoutput, src_2->runs,
+ src_2->n_runs * sizeof(rle16_t));
+ rle16_t *inputsrc2 = src_2->runs + maxoutput;
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ int src2nruns = src_2->n_runs;
+ src_2->n_runs = 0;
+
+ rle16_t previousrle;
+
+ if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
+ previousrle = run_container_append_first(src_2, inputsrc2[rlepos]);
+ rlepos++;
+ } else {
+ previousrle =
+ run_container_append_value_first(src_2, src_1->array[arraypos]);
+ arraypos++;
+ }
+
+ while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) {
+ if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
+ run_container_append(src_2, inputsrc2[rlepos], &previousrle);
+ rlepos++;
+ } else {
+ run_container_append_value(src_2, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ }
+ if (arraypos < src_1->cardinality) {
+ while (arraypos < src_1->cardinality) {
+ run_container_append_value(src_2, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ } else {
+ while (rlepos < src2nruns) {
+ run_container_append(src_2, inputsrc2[rlepos], &previousrle);
+ rlepos++;
+ }
+ }
+}
+
+bool array_array_container_union(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ if (totalCardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_create_given_capacity(totalCardinality);
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
+ ourbitset->words, src_1->cardinality, src_2->array,
+ src_2->cardinality);
+ if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // need to convert!
+ *dst = array_container_from_bitset(ourbitset);
+ bitset_container_free(ourbitset);
+ returnval = false; // not going to be a bitset
+ }
+ }
+ return returnval;
+}
+
+bool array_array_container_inplace_union(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ *dst = NULL;
+ if (totalCardinality <= DEFAULT_MAX_SIZE) {
+ if(src_1->capacity < totalCardinality) {
+ *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ } else {
+ memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
+ src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+ src_2->array, src_2->cardinality, src_1->array);
+ return false; // not a bitset
+ }
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
+ ourbitset->words, src_1->cardinality, src_2->array,
+ src_2->cardinality);
+ if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // need to convert!
+ if(src_1->capacity < ourbitset->cardinality) {
+ array_container_grow(src_1, ourbitset->cardinality, false);
+ }
+
+ bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+ src_1->array, 0);
+ src_1->cardinality = ourbitset->cardinality;
+ *dst = src_1;
+ bitset_container_free(ourbitset);
+ returnval = false; // not going to be a bitset
+ }
+ }
+ return returnval;
+}
+
+
+bool array_array_container_lazy_union(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
+ *dst = array_container_create_given_capacity(totalCardinality);
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality);
+ ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
+ }
+ return returnval;
+}
+
+
+bool array_array_container_lazy_inplace_union(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ *dst = NULL;
+ if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
+ if(src_1->capacity < totalCardinality) {
+ *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ } else {
+ memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
+ src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+ src_2->array, src_2->cardinality, src_1->array);
+ return false; // not a bitset
+ }
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality);
+ ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
+ }
+ return returnval;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_union.c */
/* begin file src/containers/mixed_xor.c */
/*
* mixed_xor.c
@@ -17784,7 +13815,7 @@ extern "C" { namespace roaring { namespace internal {
/* Compute the xor of src_1 and src_2 and write the result to
* dst (which has no container initially).
* Result is true iff dst is a bitset */
-static bool array_bitset_container_xor(
+bool array_bitset_container_xor(
const array_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -17808,7 +13839,7 @@ static bool array_bitset_container_xor(
* update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
*/
-static void array_bitset_container_lazy_xor(const array_container_t *src_1,
+void array_bitset_container_lazy_xor(const array_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst) {
if (src_2 != dst) bitset_container_copy(src_2, dst);
@@ -17823,14 +13854,14 @@ static void array_bitset_container_lazy_xor(const array_container_t *src_1,
* result true) or an array container.
*/
-static bool run_bitset_container_xor(
+bool run_bitset_container_xor(
const run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
bitset_container_t *result = bitset_container_create();
bitset_container_copy(src_2, result);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
bitset_flip_range(result->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -17851,11 +13882,11 @@ static bool run_bitset_container_xor(
* cardinality would dictate an array container.
*/
-static void run_bitset_container_lazy_xor(const run_container_t *src_1,
+void run_bitset_container_lazy_xor(const run_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst) {
if (src_2 != dst) bitset_container_copy(src_2, dst);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
bitset_flip_range(dst->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -17867,7 +13898,7 @@ static void run_bitset_container_lazy_xor(const run_container_t *src_1,
* can become any kind of container.
*/
-static int array_run_container_xor(
+int array_run_container_xor(
const array_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -17912,7 +13943,7 @@ static int array_run_container_xor(
* smaller.
*/
-static void array_run_container_lazy_xor(const array_container_t *src_1,
+void array_run_container_lazy_xor(const array_container_t *src_1,
const run_container_t *src_2,
run_container_t *dst) {
run_container_grow(dst, src_1->cardinality + src_2->n_runs, false);
@@ -17946,7 +13977,7 @@ static void array_run_container_lazy_xor(const array_container_t *src_1,
* can become any kind of container.
*/
-static int run_run_container_xor(
+int run_run_container_xor(
const run_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -17965,7 +13996,7 @@ static int run_run_container_xor(
*
*/
-static bool array_array_container_xor(
+bool array_array_container_xor(
const array_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -17991,7 +14022,7 @@ static bool array_array_container_xor(
return returnval;
}
-static bool array_array_container_lazy_xor(
+bool array_array_container_lazy_xor(
const array_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -18018,7 +14049,7 @@ static bool array_array_container_lazy_xor(
* "dst is a bitset"
*/
-static bool bitset_bitset_container_xor(
+bool bitset_bitset_container_xor(
const bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -18041,7 +14072,7 @@ static bool bitset_bitset_container_xor(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-static bool bitset_array_container_ixor(
+bool bitset_array_container_ixor(
bitset_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -18062,16 +14093,22 @@ static bool bitset_array_container_ixor(
* Anything inplace with a bitset is a good candidate
*/
-static bool bitset_bitset_container_ixor(
+bool bitset_bitset_container_ixor(
bitset_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
- bool ans = bitset_bitset_container_xor(src_1, src_2, dst);
- bitset_container_free(src_1);
- return ans;
+ int card = bitset_container_xor(src_1, src_2, src_1);
+ if (card <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(src_1);
+ bitset_container_free(src_1);
+ return false; // not bitset
+ } else {
+ *dst = src_1;
+ return true;
+ }
}
-static bool array_bitset_container_ixor(
+bool array_bitset_container_ixor(
array_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -18087,7 +14124,7 @@ static bool array_bitset_container_ixor(
* result true) or an array container.
*/
-static bool run_bitset_container_ixor(
+bool run_bitset_container_ixor(
run_container_t *src_1, const bitset_container_t *src_2,
container_t **dst
){
@@ -18096,7 +14133,7 @@ static bool run_bitset_container_ixor(
return ans;
}
-static bool bitset_run_container_ixor(
+bool bitset_run_container_ixor(
bitset_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -18109,7 +14146,7 @@ static bool bitset_run_container_ixor(
* can become any kind of container.
*/
-static int array_run_container_ixor(
+int array_run_container_ixor(
array_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -18118,7 +14155,7 @@ static int array_run_container_ixor(
return ans;
}
-static int run_array_container_ixor(
+int run_array_container_ixor(
run_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -18127,7 +14164,7 @@ static int run_array_container_ixor(
return ans;
}
-static bool array_array_container_ixor(
+bool array_array_container_ixor(
array_container_t *src_1, const array_container_t *src_2,
container_t **dst
){
@@ -18136,7 +14173,7 @@ static bool array_array_container_ixor(
return ans;
}
-static int run_run_container_ixor(
+int run_run_container_ixor(
run_container_t *src_1, const run_container_t *src_2,
container_t **dst
){
@@ -18149,1014 +14186,5455 @@ static int run_run_container_ixor(
} } } // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_xor.c */
-/* begin file src/bitset_util.c */
-#include <assert.h>
-#include <stdint.h>
+/* begin file src/containers/run.c */
#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
#ifdef __cplusplus
extern "C" { namespace roaring { namespace internal {
#endif
-#ifdef CROARING_IS_X64
-static uint8_t lengthTable[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
-#endif
+extern inline uint16_t run_container_minimum(const run_container_t *run);
+extern inline uint16_t run_container_maximum(const run_container_t *run);
+extern inline int32_t interleavedBinarySearch(const rle16_t *array,
+ int32_t lenarray, uint16_t ikey);
+extern inline bool run_container_contains(const run_container_t *run,
+ uint16_t pos);
+extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
+extern inline bool run_container_is_full(const run_container_t *run);
+extern inline bool run_container_nonzero_cardinality(const run_container_t *rc);
+extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
+extern inline run_container_t *run_container_create_range(uint32_t start,
+ uint32_t stop);
+extern inline int run_container_cardinality(const run_container_t *run);
-#ifdef CROARING_IS_X64
-ALIGNED(32)
-static uint32_t vecDecodeTable[256][8] = {
- {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
- {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
- {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
- {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
- {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
- {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
- {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
- {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
- {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
- {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
- {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
- {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
- {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
- {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
- {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
- {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
- {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
- {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
- {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
- {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
- {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
- {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
- {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
- {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
- {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
- {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
- {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
- {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
- {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
- {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
- {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
- {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
- {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
- {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
- {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
- {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
- {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
- {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
- {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
- {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
- {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
- {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
- {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
- {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
- {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
- {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
- {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
- {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
- {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
- {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
- {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
- {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
- {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
- {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
- {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
- {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
- {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
- {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
- {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
- {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
- {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
- {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
- {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
- {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
- {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
- {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
- {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
- {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
- {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
- {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
- {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
- {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
- {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
- {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
- {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
- {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
- {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
- {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
- {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
- {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
- {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
- {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
- {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
- {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
- {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
- {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
- {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
- {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
- {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
- {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
- {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
- {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
- {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
- {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
- {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
- {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
- {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
- {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
- {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
- {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
- {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
- {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
- {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
- {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
- {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
- {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
- {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
- {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
- {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
- {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
- {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
- {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
- {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
- {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
- {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
- {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
- {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
- {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
- {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
- {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
- {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
- {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
- {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
- {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
- {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
- {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
- {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
- {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
- {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
- {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
- {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
- {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
- {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
- {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
- {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
- {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
- {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
- {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
- {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
- {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
- {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
- {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
- {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
- {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
- {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
- {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
- {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
- {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
- {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
- {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
- {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
- {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
- {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
- {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
- {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
- {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
- {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
- {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
- {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
- {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
- {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
- {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
- {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
- {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
- {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
- {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
- {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
- {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
- {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
- {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
- {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
- {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
- {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
- {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
- {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
- {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
- {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
- {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
- {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
- {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
- {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
- {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
- {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
- {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
- {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
- {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
- {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
- {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
- {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
- {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
- {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
- {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
- {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
- {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
- {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
- {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
- {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
- {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
- {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
- {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
- {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
- {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
- {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
- {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
- {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
- {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
- {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
- {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
- {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
- {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
- {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
- {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
- {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
- {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
- {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
- {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
- {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
- {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
- {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
- {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
- {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
- {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
- {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
- {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
- {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
- {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
- {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
- {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
- {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
- {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
- {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
- {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
- {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
- {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
- {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
- {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
- {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
- {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
- {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
- {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
- {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
- {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
- {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
- {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
- {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
- {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
- {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
- {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
- {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
- {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
- {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
- {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
- {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
- {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
- {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
- {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
-};
-#endif // #ifdef CROARING_IS_X64
+bool run_container_add(run_container_t *run, uint16_t pos) {
+ int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
+ if (index >= 0) return false; // already there
+ index = -index - 2; // points to preceding value, possibly -1
+ if (index >= 0) { // possible match
+ int32_t offset = pos - run->runs[index].value;
+ int32_t le = run->runs[index].length;
+ if (offset <= le) return false; // already there
+ if (offset == le + 1) {
+ // we may need to fuse
+ if (index + 1 < run->n_runs) {
+ if (run->runs[index + 1].value == pos + 1) {
+ // indeed fusion is needed
+ run->runs[index].length = run->runs[index + 1].value +
+ run->runs[index + 1].length -
+ run->runs[index].value;
+ recoverRoomAtIndex(run, (uint16_t)(index + 1));
+ return true;
+ }
+ }
+ run->runs[index].length++;
+ return true;
+ }
+ if (index + 1 < run->n_runs) {
+ // we may need to fuse
+ if (run->runs[index + 1].value == pos + 1) {
+ // indeed fusion is needed
+ run->runs[index + 1].value = pos;
+ run->runs[index + 1].length = run->runs[index + 1].length + 1;
+ return true;
+ }
+ }
+ }
+ if (index == -1) {
+ // we may need to extend the first run
+ if (0 < run->n_runs) {
+ if (run->runs[0].value == pos + 1) {
+ run->runs[0].length++;
+ run->runs[0].value--;
+ return true;
+ }
+ }
+ }
+ makeRoomAtIndex(run, (uint16_t)(index + 1));
+ run->runs[index + 1].value = pos;
+ run->runs[index + 1].length = 0;
+ return true;
+}
-#ifdef CROARING_IS_X64
-// same as vecDecodeTable but in 16 bits
-ALIGNED(32)
-static uint16_t vecDecodeTable_uint16[256][8] = {
- {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
- {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
- {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
- {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
- {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
- {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
- {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
- {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
- {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
- {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
- {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
- {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
- {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
- {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
- {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
- {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
- {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
- {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
- {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
- {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
- {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
- {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
- {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
- {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
- {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
- {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
- {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
- {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
- {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
- {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
- {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
- {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
- {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
- {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
- {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
- {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
- {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
- {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
- {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
- {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
- {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
- {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
- {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
- {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
- {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
- {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
- {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
- {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
- {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
- {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
- {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
- {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
- {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
- {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
- {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
- {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
- {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
- {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
- {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
- {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
- {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
- {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
- {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
- {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
- {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
- {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
- {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
- {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
- {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
- {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
- {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
- {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
- {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
- {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
- {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
- {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
- {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
- {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
- {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
- {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
- {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
- {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
- {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
- {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
- {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
- {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
- {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
- {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
- {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
- {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
- {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
- {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
- {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
- {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
- {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
- {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
- {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
- {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
- {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
- {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
- {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
- {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
- {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
- {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
- {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
- {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
- {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
- {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
- {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
- {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
- {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
- {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
- {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
- {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
- {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
- {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
- {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
- {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
- {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
- {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
- {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
- {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
- {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
- {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
- {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
- {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
- {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
- {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
- {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
- {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
- {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
- {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
- {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
- {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
- {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
- {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
- {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
- {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
- {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
- {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
- {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
- {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
- {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
- {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
- {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
- {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
- {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
- {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
- {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
- {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
- {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
- {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
- {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
- {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
- {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
- {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
- {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
- {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
- {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
- {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
- {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
- {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
- {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
- {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
- {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
- {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
- {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
- {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
- {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
- {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
- {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
- {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
- {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
- {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
- {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
- {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
- {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
- {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
- {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
- {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
- {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
- {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
- {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
- {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
- {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
- {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
- {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
- {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
- {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
- {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
- {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
- {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
- {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
- {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
- {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
- {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
- {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
- {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
- {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
- {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
- {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
- {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
- {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
- {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
- {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
- {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
- {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
- {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
- {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
- {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
- {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
- {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
- {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
- {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
- {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
- {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
- {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
- {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
- {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
- {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
- {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
- {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
- {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
- {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
- {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
- {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
- {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
- {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
- {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
- {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
- {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
- {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
- {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
- {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
- {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
- {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
- {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
- {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
- {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
- {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
- {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
- {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
- {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
- {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
- {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
- {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
- {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
- {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
- {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
- {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
- {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
- {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
- {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
- {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
- {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
- {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
-};
+/* Create a new run container. Return NULL in case of failure. */
+run_container_t *run_container_create_given_capacity(int32_t size) {
+ run_container_t *run;
+ /* Allocate the run container itself. */
+ if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == NULL) {
+ return NULL;
+ }
+ if (size <= 0 ) { // we don't want to rely on malloc(0)
+ run->runs = NULL;
+ } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * size)) == NULL) {
+ roaring_free(run);
+ return NULL;
+ }
+ run->capacity = size;
+ run->n_runs = 0;
+ return run;
+}
-#endif
+int run_container_shrink_to_fit(run_container_t *src) {
+ if (src->n_runs == src->capacity) return 0; // nothing to do
+ int savings = src->capacity - src->n_runs;
+ src->capacity = src->n_runs;
+ rle16_t *oldruns = src->runs;
+ src->runs = (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t));
+ if (src->runs == NULL) roaring_free(oldruns); // should never happen?
+ return savings;
+}
+/* Create a new run container. Return NULL in case of failure. */
+run_container_t *run_container_create(void) {
+ return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
+}
-#ifdef CROARING_IS_X64
-CROARING_TARGET_AVX2
-size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
- uint32_t *out, size_t outcapacity,
- uint32_t base) {
- uint32_t *initout = out;
- __m256i baseVec = _mm256_set1_epi32(base - 1);
- __m256i incVec = _mm256_set1_epi32(64);
- __m256i add8 = _mm256_set1_epi32(8);
- uint32_t *safeout = out + outcapacity;
- size_t i = 0;
- for (; (i < length) && (out + 64 <= safeout); ++i) {
- uint64_t w = words[i];
- if (w == 0) {
- baseVec = _mm256_add_epi32(baseVec, incVec);
+run_container_t *run_container_clone(const run_container_t *src) {
+ run_container_t *run = run_container_create_given_capacity(src->capacity);
+ if (run == NULL) return NULL;
+ run->capacity = src->capacity;
+ run->n_runs = src->n_runs;
+ memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
+ return run;
+}
+
+void run_container_offset(const run_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset) {
+ run_container_t *lo = NULL, *hi = NULL;
+
+ bool split;
+ int lo_cap, hi_cap;
+ int top, pivot;
+
+ top = (1 << 16) - offset;
+ pivot = run_container_index_equalorlarger(c, top);
+
+ if (pivot == -1) {
+ split = false;
+ lo_cap = c->n_runs;
+ hi_cap = 0;
+ } else {
+ split = c->runs[pivot].value <= top;
+ lo_cap = pivot + (split ? 1 : 0);
+ hi_cap = c->n_runs - pivot;
+ }
+
+ if (loc && lo_cap) {
+ lo = run_container_create_given_capacity(lo_cap);
+ memcpy(lo->runs, c->runs, lo_cap*sizeof(rle16_t));
+ lo->n_runs = lo_cap;
+ for (int i = 0; i < lo_cap; ++i) {
+ lo->runs[i].value += offset;
+ }
+ *loc = (container_t*)lo;
+ }
+
+ if (hic && hi_cap) {
+ hi = run_container_create_given_capacity(hi_cap);
+ memcpy(hi->runs, c->runs+pivot, hi_cap*sizeof(rle16_t));
+ hi->n_runs = hi_cap;
+ for (int i = 0; i < hi_cap; ++i) {
+ hi->runs[i].value += offset;
+ }
+ *hic = (container_t*)hi;
+ }
+
+ // Fix the split.
+ if (split) {
+ if (lo != NULL) {
+ // Add the missing run to 'lo', exhausting length.
+ lo->runs[lo->n_runs-1].length = (1 << 16) - lo->runs[lo->n_runs-1].value - 1;
+ }
+
+ if (hi != NULL) {
+ // Fix the first run in 'hi'.
+ hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1;
+ hi->runs[0].value = 0;
+ }
+ }
+}
+
+/* Free memory. */
+void run_container_free(run_container_t *run) {
+ if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
+ roaring_free(run->runs);
+ run->runs = NULL; // pedantic
+ }
+ roaring_free(run);
+}
+
+void run_container_grow(run_container_t *run, int32_t min, bool copy) {
+ int32_t newCapacity =
+ (run->capacity == 0)
+ ? RUN_DEFAULT_INIT_SIZE
+ : run->capacity < 64 ? run->capacity * 2
+ : run->capacity < 1024 ? run->capacity * 3 / 2
+ : run->capacity * 5 / 4;
+ if (newCapacity < min) newCapacity = min;
+ run->capacity = newCapacity;
+ assert(run->capacity >= min);
+ if (copy) {
+ rle16_t *oldruns = run->runs;
+ run->runs =
+ (rle16_t *)roaring_realloc(oldruns, run->capacity * sizeof(rle16_t));
+ if (run->runs == NULL) roaring_free(oldruns);
+ } else {
+ // Jon Strabala reports that some tools complain otherwise
+ if (run->runs != NULL) {
+ roaring_free(run->runs);
+ }
+ run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
+ }
+ // handle the case where realloc fails
+ if (run->runs == NULL) {
+ fprintf(stderr, "could not allocate memory\n");
+ }
+ assert(run->runs != NULL);
+}
+
+/* copy one container into another */
+void run_container_copy(const run_container_t *src, run_container_t *dst) {
+ const int32_t n_runs = src->n_runs;
+ if (src->n_runs > dst->capacity) {
+ run_container_grow(dst, n_runs, false);
+ }
+ dst->n_runs = n_runs;
+ memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
+}
+
+/* Compute the union of `src_1' and `src_2' and write the result to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void run_container_union(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst) {
+ // TODO: this could be a lot more efficient
+
+ // we start out with inexpensive checks
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ run_container_copy(src_1, dst);
+ return;
+ }
+ if (if2) {
+ run_container_copy(src_2, dst);
+ return;
+ }
+ }
+ const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
+ if (dst->capacity < neededcapacity)
+ run_container_grow(dst, neededcapacity, false);
+ dst->n_runs = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+
+ rle16_t previousrle;
+ if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
+ previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
+ rlepos++;
+ } else {
+ previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
+ xrlepos++;
+ }
+
+ while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
+ rle16_t newrl;
+ if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
+ newrl = src_1->runs[rlepos];
+ rlepos++;
} else {
- for (int k = 0; k < 4; ++k) {
- uint8_t byteA = (uint8_t)w;
- uint8_t byteB = (uint8_t)(w >> 8);
- w >>= 16;
- __m256i vecA =
- _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]);
- __m256i vecB =
- _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]);
- uint8_t advanceA = lengthTable[byteA];
- uint8_t advanceB = lengthTable[byteB];
- vecA = _mm256_add_epi32(baseVec, vecA);
- baseVec = _mm256_add_epi32(baseVec, add8);
- vecB = _mm256_add_epi32(baseVec, vecB);
- baseVec = _mm256_add_epi32(baseVec, add8);
- _mm256_storeu_si256((__m256i *)out, vecA);
- out += advanceA;
- _mm256_storeu_si256((__m256i *)out, vecB);
- out += advanceB;
+ newrl = src_2->runs[xrlepos];
+ xrlepos++;
+ }
+ run_container_append(dst, newrl, &previousrle);
+ }
+ while (xrlepos < src_2->n_runs) {
+ run_container_append(dst, src_2->runs[xrlepos], &previousrle);
+ xrlepos++;
+ }
+ while (rlepos < src_1->n_runs) {
+ run_container_append(dst, src_1->runs[rlepos], &previousrle);
+ rlepos++;
+ }
+}
+
+/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
+ */
+void run_container_union_inplace(run_container_t *src_1,
+ const run_container_t *src_2) {
+ // TODO: this could be a lot more efficient
+
+ // we start out with inexpensive checks
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ return;
+ }
+ if (if2) {
+ run_container_copy(src_2, src_1);
+ return;
+ }
+ }
+ // we move the data to the end of the current array
+ const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
+ const int32_t neededcapacity = maxoutput + src_1->n_runs;
+ if (src_1->capacity < neededcapacity)
+ run_container_grow(src_1, neededcapacity, true);
+ memmove(src_1->runs + maxoutput, src_1->runs,
+ src_1->n_runs * sizeof(rle16_t));
+ rle16_t *inputsrc1 = src_1->runs + maxoutput;
+ const int32_t input1nruns = src_1->n_runs;
+ src_1->n_runs = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+
+ rle16_t previousrle;
+ if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
+ previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
+ rlepos++;
+ } else {
+ previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
+ xrlepos++;
+ }
+ while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
+ rle16_t newrl;
+ if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
+ newrl = inputsrc1[rlepos];
+ rlepos++;
+ } else {
+ newrl = src_2->runs[xrlepos];
+ xrlepos++;
+ }
+ run_container_append(src_1, newrl, &previousrle);
+ }
+ while (xrlepos < src_2->n_runs) {
+ run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
+ xrlepos++;
+ }
+ while (rlepos < input1nruns) {
+ run_container_append(src_1, inputsrc1[rlepos], &previousrle);
+ rlepos++;
+ }
+}
+
+/* Compute the symmetric difference of `src_1' and `src_2' and write the result
+ * to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void run_container_xor(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst) {
+ // don't bother to convert xor with full range into negation
+ // since negation is implemented similarly
+
+ const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
+ if (dst->capacity < neededcapacity)
+ run_container_grow(dst, neededcapacity, false);
+
+ int32_t pos1 = 0;
+ int32_t pos2 = 0;
+ dst->n_runs = 0;
+
+ while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
+ if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
+ run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
+ src_1->runs[pos1].length);
+ pos1++;
+ } else {
+ run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
+ src_2->runs[pos2].length);
+ pos2++;
+ }
+ }
+ while (pos1 < src_1->n_runs) {
+ run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
+ src_1->runs[pos1].length);
+ pos1++;
+ }
+
+ while (pos2 < src_2->n_runs) {
+ run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
+ src_2->runs[pos2].length);
+ pos2++;
+ }
+}
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+void run_container_intersection(const run_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst) {
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ run_container_copy(src_2, dst);
+ return;
+ }
+ if (if2) {
+ run_container_copy(src_1, dst);
+ return;
+ }
+ }
+ // TODO: this could be a lot more efficient, could use SIMD optimizations
+ const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
+ if (dst->capacity < neededcapacity)
+ run_container_grow(dst, neededcapacity, false);
+ dst->n_runs = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+ int32_t start = src_1->runs[rlepos].value;
+ int32_t end = start + src_1->runs[rlepos].length + 1;
+ int32_t xstart = src_2->runs[xrlepos].value;
+ int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
+ if (end <= xstart) {
+ ++rlepos;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
}
+ } else if (xend <= start) {
+ ++xrlepos;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else { // they overlap
+ const int32_t lateststart = start > xstart ? start : xstart;
+ int32_t earliestend;
+ if (end == xend) { // improbable
+ earliestend = end;
+ rlepos++;
+ xrlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else if (end < xend) {
+ earliestend = end;
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+
+ } else { // end > xend
+ earliestend = xend;
+ xrlepos++;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ }
+ dst->runs[dst->n_runs].value = (uint16_t)lateststart;
+ dst->runs[dst->n_runs].length =
+ (uint16_t)(earliestend - lateststart - 1);
+ dst->n_runs++;
}
}
- base += i * 64;
- for (; (i < length) && (out < safeout); ++i) {
- uint64_t w = words[i];
- while ((w != 0) && (out < safeout)) {
- uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
- int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
- uint32_t val = r + base;
- memcpy(out, &val,
- sizeof(uint32_t)); // should be compiled as a MOV on x64
- out++;
- w ^= t;
+}
+
+/* Compute the size of the intersection of src_1 and src_2 . */
+int run_container_intersection_cardinality(const run_container_t *src_1,
+ const run_container_t *src_2) {
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ return run_container_cardinality(src_2);
+ }
+ if (if2) {
+ return run_container_cardinality(src_1);
}
- base += 64;
}
- return out - initout;
+ int answer = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+ int32_t start = src_1->runs[rlepos].value;
+ int32_t end = start + src_1->runs[rlepos].length + 1;
+ int32_t xstart = src_2->runs[xrlepos].value;
+ int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
+ if (end <= xstart) {
+ ++rlepos;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ } else if (xend <= start) {
+ ++xrlepos;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else { // they overlap
+ const int32_t lateststart = start > xstart ? start : xstart;
+ int32_t earliestend;
+ if (end == xend) { // improbable
+ earliestend = end;
+ rlepos++;
+ xrlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else if (end < xend) {
+ earliestend = end;
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+
+ } else { // end > xend
+ earliestend = xend;
+ xrlepos++;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ }
+ answer += earliestend - lateststart;
+ }
+ }
+ return answer;
}
-CROARING_UNTARGET_REGION
-#endif // CROARING_IS_X64
-size_t bitset_extract_setbits(const uint64_t *words, size_t length,
- uint32_t *out, uint32_t base) {
+bool run_container_intersect(const run_container_t *src_1,
+ const run_container_t *src_2) {
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ return !run_container_empty(src_2);
+ }
+ if (if2) {
+ return !run_container_empty(src_1);
+ }
+ }
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+ int32_t start = src_1->runs[rlepos].value;
+ int32_t end = start + src_1->runs[rlepos].length + 1;
+ int32_t xstart = src_2->runs[xrlepos].value;
+ int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
+ if (end <= xstart) {
+ ++rlepos;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ } else if (xend <= start) {
+ ++xrlepos;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else { // they overlap
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/* Compute the difference of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+void run_container_andnot(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst) {
+ // following Java implementation as of June 2016
+
+ if (dst->capacity < src_1->n_runs + src_2->n_runs)
+ run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
+
+ dst->n_runs = 0;
+
+ int rlepos1 = 0;
+ int rlepos2 = 0;
+ int32_t start = src_1->runs[rlepos1].value;
+ int32_t end = start + src_1->runs[rlepos1].length + 1;
+ int32_t start2 = src_2->runs[rlepos2].value;
+ int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
+
+ while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
+ if (end <= start2) {
+ // output the first run
+ dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1);
+ rlepos1++;
+ if (rlepos1 < src_1->n_runs) {
+ start = src_1->runs[rlepos1].value;
+ end = start + src_1->runs[rlepos1].length + 1;
+ }
+ } else if (end2 <= start) {
+ // exit the second run
+ rlepos2++;
+ if (rlepos2 < src_2->n_runs) {
+ start2 = src_2->runs[rlepos2].value;
+ end2 = start2 + src_2->runs[rlepos2].length + 1;
+ }
+ } else {
+ if (start < start2) {
+ dst->runs[dst->n_runs++] =
+ MAKE_RLE16(start, start2 - start - 1);
+ }
+ if (end2 < end) {
+ start = end2;
+ } else {
+ rlepos1++;
+ if (rlepos1 < src_1->n_runs) {
+ start = src_1->runs[rlepos1].value;
+ end = start + src_1->runs[rlepos1].length + 1;
+ }
+ }
+ }
+ }
+ if (rlepos1 < src_1->n_runs) {
+ dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1);
+ rlepos1++;
+ if (rlepos1 < src_1->n_runs) {
+ memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
+ sizeof(rle16_t) * (src_1->n_runs - rlepos1));
+ dst->n_runs += src_1->n_runs - rlepos1;
+ }
+ }
+}
+
+ALLOW_UNALIGNED
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
+ uint32_t base) {
int outpos = 0;
- size_t i; for (i = 0; i < length; ++i) {
- uint64_t w = words[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
- int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
- uint32_t val = r + base;
+ uint32_t *out = (uint32_t *)vout;
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ for (int j = 0; j <= le; ++j) {
+ uint32_t val = run_start + j;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
outpos++;
- w ^= t;
}
- base += 64;
}
return outpos;
}
-size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
- const uint64_t * __restrict__ words2,
- size_t length, uint16_t *out,
- uint16_t base) {
- int outpos = 0;
- size_t i; for (i = 0; i < length; ++i) {
- uint64_t w = words1[i] & words2[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- out[outpos++] = r + base;
- w ^= t;
- }
- base += 64;
+/*
+ * Print this container using printf (useful for debugging).
+ */
+void run_container_printf(const run_container_t *cont) {
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint16_t run_start = cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ printf("[%d,%d]", run_start, run_start + le);
}
- return outpos;
}
-#ifdef CROARING_IS_X64
/*
- * Given a bitset containing "length" 64-bit words, write out the position
- * of all the set bits to "out" as 16-bit integers, values start at "base" (can
- *be set to zero).
- *
- * The "out" pointer should be sufficient to store the actual number of bits
- *set.
- *
- * Returns how many values were actually decoded.
- *
- * This function uses SSE decoding.
+ * Print this container using printf as a comma-separated list of 32-bit
+ * integers starting at base.
*/
-CROARING_TARGET_AVX2
-static size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
- uint16_t *out, size_t outcapacity,
- uint16_t base) {
- uint16_t *initout = out;
- __m128i baseVec = _mm_set1_epi16(base - 1);
- __m128i incVec = _mm_set1_epi16(64);
- __m128i add8 = _mm_set1_epi16(8);
- uint16_t *safeout = out + outcapacity;
- const int numberofbytes = 2; // process two bytes at a time
- size_t i = 0;
- for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) {
- uint64_t w = words[i];
- if (w == 0) {
- baseVec = _mm_add_epi16(baseVec, incVec);
- } else {
- for (int k = 0; k < 4; ++k) {
- uint8_t byteA = (uint8_t)w;
- uint8_t byteB = (uint8_t)(w >> 8);
- w >>= 16;
- __m128i vecA = _mm_load_si128(
- (const __m128i *)vecDecodeTable_uint16[byteA]);
- __m128i vecB = _mm_load_si128(
- (const __m128i *)vecDecodeTable_uint16[byteB]);
- uint8_t advanceA = lengthTable[byteA];
- uint8_t advanceB = lengthTable[byteB];
- vecA = _mm_add_epi16(baseVec, vecA);
- baseVec = _mm_add_epi16(baseVec, add8);
- vecB = _mm_add_epi16(baseVec, vecB);
- baseVec = _mm_add_epi16(baseVec, add8);
- _mm_storeu_si128((__m128i *)out, vecA);
- out += advanceA;
- _mm_storeu_si128((__m128i *)out, vecB);
- out += advanceB;
+void run_container_printf_as_uint32_array(const run_container_t *cont,
+ uint32_t base) {
+ if (cont->n_runs == 0) return;
+ {
+ uint32_t run_start = base + cont->runs[0].value;
+ uint16_t le = cont->runs[0].length;
+ printf("%u", run_start);
+ for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j);
+ }
+ for (int32_t i = 1; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j);
+ }
+}
+
+int32_t run_container_write(const run_container_t *container, char *buf) {
+ uint16_t cast_16 = container->n_runs;
+ memcpy(buf, &cast_16, sizeof(uint16_t));
+ memcpy(buf + sizeof(uint16_t), container->runs,
+ container->n_runs * sizeof(rle16_t));
+ return run_container_size_in_bytes(container);
+}
+
+int32_t run_container_read(int32_t cardinality, run_container_t *container,
+ const char *buf) {
+ (void)cardinality;
+ uint16_t cast_16;
+ memcpy(&cast_16, buf, sizeof(uint16_t));
+ container->n_runs = cast_16;
+ if (container->n_runs > container->capacity)
+ run_container_grow(container, container->n_runs, false);
+ if(container->n_runs > 0) {
+ memcpy(container->runs, buf + sizeof(uint16_t),
+ container->n_runs * sizeof(rle16_t));
+ }
+ return run_container_size_in_bytes(container);
+}
+
+bool run_container_iterate(const run_container_t *cont, uint32_t base,
+ roaring_iterator iterator, void *ptr) {
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+
+ for (int j = 0; j <= le; ++j)
+ if (!iterator(run_start + j, ptr)) return false;
+ }
+ return true;
+}
+
+bool run_container_iterate64(const run_container_t *cont, uint32_t base,
+ roaring_iterator64 iterator, uint64_t high_bits,
+ void *ptr) {
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+
+ for (int j = 0; j <= le; ++j)
+ if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
+ return false;
+ }
+ return true;
+}
+
+bool run_container_is_subset(const run_container_t *container1,
+ const run_container_t *container2) {
+ int i1 = 0, i2 = 0;
+ while (i1 < container1->n_runs && i2 < container2->n_runs) {
+ int start1 = container1->runs[i1].value;
+ int stop1 = start1 + container1->runs[i1].length;
+ int start2 = container2->runs[i2].value;
+ int stop2 = start2 + container2->runs[i2].length;
+ if (start1 < start2) {
+ return false;
+ } else { // start1 >= start2
+ if (stop1 < stop2) {
+ i1++;
+ } else if (stop1 == stop2) {
+ i1++;
+ i2++;
+ } else { // stop1 > stop2
+ i2++;
}
}
}
- base += (uint16_t)(i * 64);
- for (; (i < length) && (out < safeout); ++i) {
- uint64_t w = words[i];
- while ((w != 0) && (out < safeout)) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- *out = r + base;
- out++;
- w ^= t;
+ if (i1 == container1->n_runs) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// TODO: write smart_append_exclusive version to match the overloaded 1 param
+// Java version (or is it even used?)
+
+// follows the Java implementation closely
+// length is the rle-value. Ie, run [10,12) uses a length value 1.
+void run_container_smart_append_exclusive(run_container_t *src,
+ const uint16_t start,
+ const uint16_t length) {
+ int old_end;
+ rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
+ rle16_t *appended_last_run = src->runs + src->n_runs;
+
+ if (!src->n_runs ||
+ (start > (old_end = last_run->value + last_run->length + 1))) {
+ *appended_last_run = MAKE_RLE16(start, length);
+ src->n_runs++;
+ return;
+ }
+ if (old_end == start) {
+ // we merge
+ last_run->length += (length + 1);
+ return;
+ }
+ int new_end = start + length + 1;
+
+ if (start == last_run->value) {
+ // wipe out previous
+ if (new_end < old_end) {
+ *last_run = MAKE_RLE16(new_end, old_end - new_end - 1);
+ return;
+ } else if (new_end > old_end) {
+ *last_run = MAKE_RLE16(old_end, new_end - old_end - 1);
+ return;
+ } else {
+ src->n_runs--;
+ return;
}
- base += 64;
}
- return out - initout;
+ last_run->length = start - last_run->value - 1;
+ if (new_end < old_end) {
+ *appended_last_run = MAKE_RLE16(new_end, old_end - new_end - 1);
+ src->n_runs++;
+ } else if (new_end > old_end) {
+ *appended_last_run = MAKE_RLE16(old_end, new_end - old_end - 1);
+ src->n_runs++;
+ }
}
+
+bool run_container_select(const run_container_t *container,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element) {
+ for (int i = 0; i < container->n_runs; i++) {
+ uint16_t length = container->runs[i].length;
+ if (rank <= *start_rank + length) {
+ uint16_t value = container->runs[i].value;
+ *element = value + rank - (*start_rank);
+ return true;
+ } else
+ *start_rank += length + 1;
+ }
+ return false;
+}
+
+int run_container_rank(const run_container_t *container, uint16_t x) {
+ int sum = 0;
+ uint32_t x32 = x;
+ for (int i = 0; i < container->n_runs; i++) {
+ uint32_t startpoint = container->runs[i].value;
+ uint32_t length = container->runs[i].length;
+ uint32_t endpoint = length + startpoint;
+ if (x <= endpoint) {
+ if (x < startpoint) break;
+ return sum + (x32 - startpoint) + 1;
+ } else {
+ sum += length + 1;
+ }
+ }
+ return sum;
+}
+
+#ifdef CROARING_IS_X64
+
+CROARING_TARGET_AVX2
+ALLOW_UNALIGNED
+/* Get the cardinality of `run'. Requires an actual computation. */
+static inline int _avx2_run_container_cardinality(const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ int32_t k = 0;
+ const int32_t step = sizeof(__m256i) / sizeof(rle16_t);
+ if (n_runs > step) {
+ __m256i total = _mm256_setzero_si256();
+ for (; k + step <= n_runs; k += step) {
+ __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k));
+ __m256i justlengths = _mm256_srli_epi32(ymm1, 16);
+ total = _mm256_add_epi32(total, justlengths);
+ }
+ // a store might be faster than extract?
+ uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
+ _mm256_storeu_si256((__m256i *)buffer, total);
+ sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
+ (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
+ }
+ for (; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
+
CROARING_UNTARGET_REGION
+
+/* Get the cardinality of `run'. Requires an actual computation. */
+static inline int _scalar_run_container_cardinality(const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ for (int k = 0; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
+
+int run_container_cardinality(const run_container_t *run) {
+ if( croaring_avx2() ) {
+ return _avx2_run_container_cardinality(run);
+ } else {
+ return _scalar_run_container_cardinality(run);
+ }
+}
+#else
+
+/* Get the cardinality of `run'. Requires an actual computation. */
+int run_container_cardinality(const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ for (int k = 0; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
#endif
-/*
- * Given a bitset containing "length" 64-bit words, write out the position
- * of all the set bits to "out", values start at "base" (can be set to zero).
- *
- * The "out" pointer should be sufficient to store the actual number of bits
- *set.
- *
- * Returns how many values were actually decoded.
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/run.c */
+/* begin file src/memory.c */
+#include <stdlib.h>
+
+// without the following, we get lots of warnings about posix_memalign
+#ifndef __cplusplus
+extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
+#endif //__cplusplus // C++ does not have a well defined signature
+
+// portable version of posix_memalign
+static void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
+ void *p;
+#ifdef _MSC_VER
+ p = _aligned_malloc(size, alignment);
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ p = __mingw_aligned_malloc(size, alignment);
+#else
+ // somehow, if this is used before including "x86intrin.h", it creates an
+ // implicit defined warning.
+ if (posix_memalign(&p, alignment, size) != 0) return NULL;
+#endif
+ return p;
+}
+
+static void roaring_bitmap_aligned_free(void *memblock) {
+#ifdef _MSC_VER
+ _aligned_free(memblock);
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ __mingw_aligned_free(memblock);
+#else
+ free(memblock);
+#endif
+}
+
+static roaring_memory_t global_memory_hook = {
+ .malloc = malloc,
+ .realloc = realloc,
+ .calloc = calloc,
+ .free = free,
+ .aligned_malloc = roaring_bitmap_aligned_malloc,
+ .aligned_free = roaring_bitmap_aligned_free,
+};
+
+void roaring_init_memory_hook(roaring_memory_t memory_hook) {
+ global_memory_hook = memory_hook;
+}
+
+void* roaring_malloc(size_t n) {
+ return global_memory_hook.malloc(n);
+}
+
+void* roaring_realloc(void* p, size_t new_sz) {
+ return global_memory_hook.realloc(p, new_sz);
+}
+
+void* roaring_calloc(size_t n_elements, size_t element_size) {
+ return global_memory_hook.calloc(n_elements, element_size);
+}
+
+void roaring_free(void* p) {
+ global_memory_hook.free(p);
+}
+
+void* roaring_aligned_malloc(size_t alignment, size_t size) {
+ return global_memory_hook.aligned_malloc(alignment, size);
+}
+
+void roaring_aligned_free(void* p) {
+ global_memory_hook.aligned_free(p);
+}
+/* end file src/memory.c */
+/* begin file src/roaring.c */
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+
+
+#ifdef __cplusplus
+using namespace ::roaring::internal;
+
+extern "C" { namespace roaring { namespace api {
+#endif
+
+#define CROARING_SERIALIZATION_ARRAY_UINT32 1
+#define CROARING_SERIALIZATION_CONTAINER 2
+
+extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
+extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
+
+static inline bool is_cow(const roaring_bitmap_t *r) {
+ return r->high_low_container.flags & ROARING_FLAG_COW;
+}
+static inline bool is_frozen(const roaring_bitmap_t *r) {
+ return r->high_low_container.flags & ROARING_FLAG_FROZEN;
+}
+
+// this is like roaring_bitmap_add, but it populates pointer arguments in such a
+// way
+// that we can recover the container touched, which, in turn can be used to
+// accelerate some functions (when you repeatedly need to add to the same
+// container)
+static inline container_t *containerptr_roaring_bitmap_add(
+ roaring_bitmap_t *r, uint32_t val,
+ uint8_t *type, int *index
+){
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint16_t hb = val >> 16;
+ const int i = ra_get_index(ra, hb);
+ if (i >= 0) {
+ ra_unshare_container_at_index(ra, i);
+ container_t *c = ra_get_container_at_index(ra, i, type);
+ uint8_t new_type = *type;
+ container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
+ *index = i;
+ if (c2 != c) {
+ container_free(c, *type);
+ ra_set_container_at_index(ra, i, c2, new_type);
+ *type = new_type;
+ return c2;
+ } else {
+ return c;
+ }
+ } else {
+ array_container_t *new_ac = array_container_create();
+ container_t *c = container_add(new_ac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, type);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
+ *index = -i - 1;
+ return c;
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (!ans) {
+ return NULL;
+ }
+ bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ return ans;
+}
+
+bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
+ return ra_init_with_capacity(&r->high_low_container, cap);
+}
+
+static inline void add_bulk_impl(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val) {
+ uint16_t key = val >> 16;
+ if (context->container == NULL || context->key != key) {
+ uint8_t typecode;
+ int idx;
+ context->container = containerptr_roaring_bitmap_add(
+ r, val, &typecode, &idx);
+ context->typecode = typecode;
+ context->idx = idx;
+ context->key = key;
+ } else {
+ // no need to seek the container, it is at hand
+ // because we already have the container at hand, we can do the
+ // insertion directly, bypassing the roaring_bitmap_add call
+ uint8_t new_typecode;
+ container_t *container2 = container_add(
+ context->container, val & 0xFFFF, context->typecode, &new_typecode);
+ if (container2 != context->container) {
+ // rare instance when we need to change the container type
+ container_free(context->container, context->typecode);
+ ra_set_container_at_index(&r->high_low_container, context->idx,
+ container2, new_typecode);
+ context->typecode = new_typecode;
+ context->container = container2;
+ }
+ }
+}
+
+void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals) {
+ uint32_t val;
+ const uint32_t *start = vals;
+ const uint32_t *end = vals + n_args;
+ const uint32_t *current_val = start;
+
+ if (n_args == 0) {
+ return;
+ }
+
+ uint8_t typecode;
+ int idx;
+ container_t *container;
+ val = *current_val;
+ container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
+ roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode};
+
+ for (; current_val != end; current_val++) {
+ memcpy(&val, current_val, sizeof(val));
+ add_bulk_impl(r, &context, val);
+ }
+}
+
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context, uint32_t val) {
+ add_bulk_impl(r, context, val);
+}
+
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val)
+{
+ uint16_t key = val >> 16;
+ if (context->container == NULL || context->key != key) {
+ int32_t start_idx = -1;
+ if (context->container != NULL && context->key < key) {
+ start_idx = context->idx;
+ }
+ int idx = ra_advance_until(&r->high_low_container, key, start_idx);
+ if (idx == ra_get_size(&r->high_low_container)) {
+ return false;
+ }
+ uint8_t typecode;
+ context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode);
+ context->typecode = typecode;
+ context->idx = idx;
+ context->key = ra_get_key_at_index(&r->high_low_container, idx);
+ // ra_advance_until finds the next key >= the target, we found a later container.
+ if (context->key != key) {
+ return false;
+ }
+ }
+ // context is now set up
+ return container_contains(context->container, val & 0xFFFF, context->typecode);
+}
+
+roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ roaring_bitmap_add_many(answer, n_args, vals);
+ return answer;
+}
+
+roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
+ // todo: could be greatly optimized but we do not expect this call to ever
+ // include long lists
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ roaring_bulk_context_t context;
+ va_list ap;
+
+ memset(&context, 0, sizeof(context));
+ va_start(ap, n_args);
+ for (size_t i = 0; i < n_args; i++) {
+ uint32_t val = va_arg(ap, uint32_t);
+ roaring_bitmap_add_bulk(answer, &context, val);
+ }
+ va_end(ap);
+ return answer;
+}
+
+static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
+ return (a < b) ? a : b;
+}
+
+static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
+ return (a < b) ? a : b;
+}
+
+roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
+ uint32_t step) {
+ if(max >= UINT64_C(0x100000000)) {
+ max = UINT64_C(0x100000000);
+ }
+ if (step == 0) return NULL;
+ if (max <= min) return NULL;
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ if (step >= (1 << 16)) {
+ for (uint32_t value = (uint32_t)min; value < max; value += step) {
+ roaring_bitmap_add(answer, value);
+ }
+ return answer;
+ }
+ uint64_t min_tmp = min;
+ do {
+ uint32_t key = (uint32_t)min_tmp >> 16;
+ uint32_t container_min = min_tmp & 0xFFFF;
+ uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
+ uint8_t type;
+ container_t *container = container_from_range(&type, container_min,
+ container_max, (uint16_t)step);
+ ra_append(&answer->high_low_container, key, container, type);
+ uint32_t gap = container_max - container_min + step - 1;
+ min_tmp += gap - (gap % step);
+ } while (min_tmp < max);
+ // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
+ return answer;
+}
+
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+ if (min > max) {
+ return;
+ }
+
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint32_t min_key = min >> 16;
+ uint32_t max_key = max >> 16;
+
+ int32_t num_required_containers = max_key - min_key + 1;
+ int32_t suffix_length = count_greater(ra->keys, ra->size, max_key);
+ int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length,
+ min_key);
+ int32_t common_length = ra->size - prefix_length - suffix_length;
+
+ if (num_required_containers > common_length) {
+ ra_shift_tail(ra, suffix_length,
+ num_required_containers - common_length);
+ }
+
+ int32_t src = prefix_length + common_length - 1;
+ int32_t dst = ra->size - suffix_length - 1;
+ for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0
+ uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
+ uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
+ container_t* new_container;
+ uint8_t new_type;
+
+ if (src >= 0 && ra->keys[src] == key) {
+ ra_unshare_container_at_index(ra, src);
+ new_container = container_add_range(ra->containers[src],
+ ra->typecodes[src],
+ container_min, container_max,
+ &new_type);
+ if (new_container != ra->containers[src]) {
+ container_free(ra->containers[src],
+ ra->typecodes[src]);
+ }
+ src--;
+ } else {
+ new_container = container_from_range(&new_type, container_min,
+ container_max+1, 1);
+ }
+ ra_replace_key_and_container_at_index(ra, dst, key, new_container,
+ new_type);
+ dst--;
+ }
+}
+
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+ if (min > max) {
+ return;
+ }
+
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint32_t min_key = min >> 16;
+ uint32_t max_key = max >> 16;
+
+ int32_t src = count_less(ra->keys, ra->size, min_key);
+ int32_t dst = src;
+ while (src < ra->size && ra->keys[src] <= max_key) {
+ uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
+ uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
+ ra_unshare_container_at_index(ra, src);
+ container_t *new_container;
+ uint8_t new_type;
+ new_container = container_remove_range(ra->containers[src],
+ ra->typecodes[src],
+ container_min, container_max,
+ &new_type);
+ if (new_container != ra->containers[src]) {
+ container_free(ra->containers[src],
+ ra->typecodes[src]);
+ }
+ if (new_container) {
+ ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
+ new_container, new_type);
+ dst++;
+ }
+ src++;
+ }
+ if (src > dst) {
+ ra_shift_tail(ra, ra->size - src, dst - src);
+ }
+}
+
+extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+
+void roaring_bitmap_printf(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ printf("{");
+ for (int i = 0; i < ra->size; ++i) {
+ container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+
+ if (i + 1 < ra->size) {
+ printf(",");
+ }
+ }
+ printf("}");
+}
+
+void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ printf("{");
+ for (int i = 0; i < ra->size; ++i) {
+ printf("%d: %s (%d)", ra->keys[i],
+ get_full_container_name(ra->containers[i], ra->typecodes[i]),
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]));
+ if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
+ printf(
+ "(shared count = %" PRIu32 " )",
+ CAST_shared(ra->containers[i])->counter);
+ }
+
+ if (i + 1 < ra->size) {
+ printf(", ");
+ }
+ }
+ printf("}");
+}
+
+typedef struct min_max_sum_s {
+ uint32_t min;
+ uint32_t max;
+ uint64_t sum;
+} min_max_sum_t;
+
+static bool min_max_sum_fnc(uint32_t value, void *param) {
+ min_max_sum_t *mms = (min_max_sum_t *)param;
+ if (value > mms->max) mms->max = value;
+ if (value < mms->min) mms->min = value;
+ mms->sum += value;
+ return true; // we always process all data points
+}
+
+/**
+* (For advanced users.)
+* Collect statistics about the bitmap
+*/
+void roaring_bitmap_statistics(const roaring_bitmap_t *r,
+ roaring_statistics_t *stat) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ memset(stat, 0, sizeof(*stat));
+ stat->n_containers = ra->size;
+ stat->cardinality = roaring_bitmap_get_cardinality(r);
+ min_max_sum_t mms;
+ mms.min = UINT32_C(0xFFFFFFFF);
+ mms.max = UINT32_C(0);
+ mms.sum = 0;
+ roaring_iterate(r, &min_max_sum_fnc, &mms);
+ stat->min_value = mms.min;
+ stat->max_value = mms.max;
+ stat->sum_value = mms.sum;
+
+ for (int i = 0; i < ra->size; ++i) {
+ uint8_t truetype =
+ get_container_type(ra->containers[i], ra->typecodes[i]);
+ uint32_t card =
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]);
+ uint32_t sbytes =
+ container_size_in_bytes(ra->containers[i], ra->typecodes[i]);
+ switch (truetype) {
+ case BITSET_CONTAINER_TYPE:
+ stat->n_bitset_containers++;
+ stat->n_values_bitset_containers += card;
+ stat->n_bytes_bitset_containers += sbytes;
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ stat->n_array_containers++;
+ stat->n_values_array_containers += card;
+ stat->n_bytes_array_containers += sbytes;
+ break;
+ case RUN_CONTAINER_TYPE:
+ stat->n_run_containers++;
+ stat->n_values_run_containers += card;
+ stat->n_bytes_run_containers += sbytes;
+ break;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (!ans) {
+ return NULL;
+ }
+ if (!ra_init_with_capacity( // allocation of list of containers can fail
+ &ans->high_low_container, r->high_low_container.size)
+ ){
+ roaring_free(ans);
+ return NULL;
+ }
+ if (!ra_overwrite( // memory allocation of individual containers may fail
+ &r->high_low_container, &ans->high_low_container, is_cow(r))
+ ){
+ roaring_bitmap_free(ans); // overwrite should leave in freeable state
+ return NULL;
+ }
+ roaring_bitmap_set_copy_on_write(ans, is_cow(r));
+ return ans;
+}
+
+bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
+ const roaring_bitmap_t *src) {
+ roaring_bitmap_set_copy_on_write(dest, is_cow(src));
+ return ra_overwrite(&src->high_low_container, &dest->high_low_container,
+ is_cow(src));
+}
+
+void roaring_bitmap_free(const roaring_bitmap_t *r) {
+ if (!is_frozen(r)) {
+ ra_clear((roaring_array_t*)&r->high_low_container);
+ }
+ roaring_free((roaring_bitmap_t*)r);
+}
+
+void roaring_bitmap_clear(roaring_bitmap_t *r) {
+ ra_reset(&r->high_low_container);
+}
+
+void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
+ roaring_array_t *ra = &r->high_low_container;
+
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(ra, hb);
+ uint8_t typecode;
+ if (i >= 0) {
+ ra_unshare_container_at_index(ra, i);
+ container_t *container =
+ ra_get_container_at_index(ra, i, &typecode);
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_add(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+ } else {
+ array_container_t *newac = array_container_create();
+ container_t *container = container_add(newac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, &typecode);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
+ container, typecode);
+ }
+}
+
+bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ bool result = false;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+
+ const int oldCardinality =
+ container_get_cardinality(container, typecode);
+
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_add(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ result = true;
+ } else {
+ const int newCardinality =
+ container_get_cardinality(container, newtypecode);
+
+ result = oldCardinality != newCardinality;
+ }
+ } else {
+ array_container_t *newac = array_container_create();
+ container_t *container = container_add(newac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, &typecode);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
+ container, typecode);
+ result = true;
+ }
+
+ return result;
+}
+
+void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_remove(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+ if (container_get_cardinality(container2, newtypecode) != 0) {
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ } else {
+ ra_remove_at_index_and_free(&r->high_low_container, i);
+ }
+ }
+}
+
+bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ bool result = false;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+
+ const int oldCardinality =
+ container_get_cardinality(container, typecode);
+
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_remove(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+
+ const int newCardinality =
+ container_get_cardinality(container2, newtypecode);
+
+ if (newCardinality != 0) {
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ } else {
+ ra_remove_at_index_and_free(&r->high_low_container, i);
+ }
+
+ result = oldCardinality != newCardinality;
+ }
+ return result;
+}
+
+void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals) {
+ if (n_args == 0 || r->high_low_container.size == 0) {
+ return;
+ }
+ int32_t pos = -1; // position of the container used in the previous iteration
+ for (size_t i = 0; i < n_args; i++) {
+ uint16_t key = (uint16_t)(vals[i] >> 16);
+ if (pos < 0 || key != r->high_low_container.keys[pos]) {
+ pos = ra_get_index(&r->high_low_container, key);
+ }
+ if (pos >= 0) {
+ uint8_t new_typecode;
+ container_t *new_container;
+ new_container = container_remove(r->high_low_container.containers[pos],
+ vals[i] & 0xffff,
+ r->high_low_container.typecodes[pos],
+ &new_typecode);
+ if (new_container != r->high_low_container.containers[pos]) {
+ container_free(r->high_low_container.containers[pos],
+ r->high_low_container.typecodes[pos]);
+ ra_replace_key_and_container_at_index(&r->high_low_container,
+ pos, key, new_container,
+ new_typecode);
+ }
+ if (!container_nonzero_cardinality(new_container, new_typecode)) {
+ container_free(new_container, new_typecode);
+ ra_remove_at_index(&r->high_low_container, pos);
+ pos = -1;
+ }
+ }
+ }
+}
+
+// there should be some SIMD optimizations possible here
+roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint32_t neededcap = length1 > length2 ? length2 : length1;
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_and(c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type); // otherwise: memory leak!
+ }
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer;
+}
+
+/**
+ * Compute the union of 'number' bitmaps.
*/
-static size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
- uint16_t *out, uint16_t base) {
- int outpos = 0;
- size_t i; for (i = 0; i < length; ++i) {
- uint64_t w = words[i];
- while (w != 0) {
- uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- out[outpos++] = r + base;
- w ^= t;
+roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
+ for (size_t i = 2; i < number; i++) {
+ roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
+ }
+ roaring_bitmap_repair_after_lazy(answer);
+ return answer;
+}
+
+/**
+ * Compute the xor of 'number' bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
+ for (size_t i = 2; i < number; i++) {
+ roaring_bitmap_lazy_xor_inplace(answer, x[i]);
+ }
+ roaring_bitmap_repair_after_lazy(answer);
+ return answer;
+}
+
+// inplace and (modifies its first argument).
+void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ if (x1 == x2) return;
+ int pos1 = 0, pos2 = 0, intersection_size = 0;
+ const int length1 = ra_get_size(&x1->high_low_container);
+ const int length2 = ra_get_size(&x2->high_low_container);
+
+ // any skipped-over or newly emptied containers in x1
+ // have to be freed.
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2, result_type;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+ container_t *c =
+ (type1 == SHARED_CONTAINER_TYPE)
+ ? container_and(c1, type1, c2, type2, &result_type)
+ : container_iand(c1, type1, c2, type2, &result_type);
+
+ if (c != c1) { // in this instance a new container was created, and
+ // we need to free the old one
+ container_free(c1, type1);
+ }
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size, s1, c,
+ result_type);
+ intersection_size++;
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) {
+ pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
}
- base += 64;
}
- return outpos;
+
+ // if we ended early because x2 ran out, then all remaining in x1 should be
+ // freed
+ while (pos1 < length1) {
+ container_free(x1->high_low_container.containers[pos1],
+ x1->high_low_container.typecodes[pos1]);
+ ++pos1;
+ }
+
+ // all containers after this have either been copied or freed
+ ra_downsize(&x1->high_low_container, intersection_size);
}
-#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64)
+roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_or(c1, type1, c2, type2, &result_type);
-static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
- uint64_t offset, load, pos;
- uint64_t shift = 6;
- const uint16_t *end = list + length;
- if (!length) return card;
- // TODO: could unroll for performance, see bitset_set_list
- // bts is not available as an intrinsic in GCC
- __asm volatile(
- "1:\n"
- "movzwq (%[list]), %[pos]\n"
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "bts %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)\n"
- "sbb $-1, %[card]\n"
- "add $2, %[list]\n"
- "cmp %[list], %[end]\n"
- "jnz 1b"
- : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
- [pos] "=&r"(pos), [offset] "=&r"(offset)
- : [end] "r"(end), [words] "r"(words), [shift] "r"(shift));
- return card;
+ // since we assume that the initial containers are non-empty, the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ // c1 = container_clone(c1, type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // c2 = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
}
-static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
- uint64_t pos;
- const uint16_t *end = list + length;
+// inplace or (modifies its first argument).
+void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
- uint64_t shift = 6;
- uint64_t offset;
- uint64_t load;
- for (; list + 3 < end; list += 4) {
- pos = list[0];
- __asm volatile(
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "bts %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)"
- : [load] "=&r"(load), [offset] "=&r"(offset)
- : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
- pos = list[1];
- __asm volatile(
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "bts %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)"
- : [load] "=&r"(load), [offset] "=&r"(offset)
- : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
- pos = list[2];
- __asm volatile(
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "bts %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)"
- : [load] "=&r"(load), [offset] "=&r"(offset)
- : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
- pos = list[3];
- __asm volatile(
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "bts %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)"
- : [load] "=&r"(load), [offset] "=&r"(offset)
- : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
}
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ if (!container_is_full(c1, type1)) {
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c =
+ (type1 == SHARED_CONTAINER_TYPE)
+ ? container_or(c1, type1, c2, type2, &result_type)
+ : container_ior(c1, type1, c2, type2, &result_type);
- while (list != end) {
- pos = list[0];
- __asm volatile(
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "bts %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)"
- : [load] "=&r"(load), [offset] "=&r"(offset)
- : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
- list++;
+ if (c != c1) { // in this instance a new container was created,
+ // and we need to free the old one
+ container_free(c1, type1);
+ }
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+
+ // container_t *c2_clone = container_clone(c2, type2);
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
}
}
-static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
- uint64_t length) {
- uint64_t offset, load, pos;
- uint64_t shift = 6;
- const uint16_t *end = list + length;
- if (!length) return card;
- // btr is not available as an intrinsic in GCC
- __asm volatile(
- "1:\n"
- "movzwq (%[list]), %[pos]\n"
- "shrx %[shift], %[pos], %[offset]\n"
- "mov (%[words],%[offset],8), %[load]\n"
- "btr %[pos], %[load]\n"
- "mov %[load], (%[words],%[offset],8)\n"
- "sbb $0, %[card]\n"
- "add $2, %[list]\n"
- "cmp %[list], %[end]\n"
- "jnz 1b"
- : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
- [pos] "=&r"(pos), [offset] "=&r"(offset)
- : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)
- :
- /* clobbers */ "memory");
- return card;
+roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_xor(c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
}
-static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
- uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *(const uint16_t *)list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load & ~(UINT64_C(1) << index);
- card -= (load ^ newload) >> index;
- words[offset] = newload;
- list++;
+// inplace xor (modifies its first argument).
+
+void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+
+ // XOR can have new containers inserted from x2, but can also
+ // lose containers when x1 and x2 are nonempty and identical.
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_xor(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // so release
+ }
+ else {
+ c = container_ixor(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ ++pos1;
+ } else {
+ container_free(c, result_type);
+ ra_remove_at_index(&x1->high_low_container, pos1);
+ --length1;
+ }
+
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2));
+ return empty_bitmap;
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = 0;
+ uint16_t s2 = 0;
+ while (true) {
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_andnot(c1, type1, c2, type2,
+ &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ } else if (s1 < s2) { // s1 < s2
+ const int next_pos1 =
+ ra_advance_until(&x1->high_low_container, s2, pos1);
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, next_pos1,
+ is_cow(x1));
+ // TODO : perhaps some of the copy_on_write should be based on
+ // answer rather than x1 (more stringent?). Many similar cases
+ pos1 = next_pos1;
+ if (pos1 == length1) break;
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ if (pos2 == length2) break;
+ }
+ }
+ if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace andnot (modifies its first argument).
+
+void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+ int intersection_size = 0;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_clear(x1);
+ return;
+ }
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_andnot(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // release
+ }
+ else {
+ c = container_iandnot(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size++, s1,
+ c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ if (pos1 != intersection_size) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size, s1, c1,
+ type1);
+ }
+ intersection_size++;
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+
+ if (pos1 < length1) {
+ // all containers between intersection_size and
+ // pos1 are junk. However, they have either been moved
+ // (thus still referenced) or involved in an iandnot
+ // that will clean up all containers that could not be reused.
+ // Thus we should not free the junk containers between
+ // intersection_size and pos1.
+ if (pos1 > intersection_size) {
+ // left slide of remaining items
+ ra_copy_range(&x1->high_low_container, pos1, length1,
+ intersection_size);
+ }
+ // else current placement is fine
+ intersection_size += (length1 - pos1);
}
+ ra_downsize(&x1->high_low_container, intersection_size);
+}
+
+uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ uint64_t card = 0;
+ for (int i = 0; i < ra->size; ++i)
+ card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
return card;
}
-static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load | (UINT64_C(1) << index);
- card += (load ^ newload) >> index;
- words[offset] = newload;
- list++;
+uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
+ uint64_t range_start,
+ uint64_t range_end) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ if (range_end > UINT32_MAX) {
+ range_end = UINT32_MAX + UINT64_C(1);
+ }
+ if (range_start >= range_end) {
+ return 0;
}
+ range_end--; // make range_end inclusive
+ // now we have: 0 <= range_start <= range_end <= UINT32_MAX
+
+ uint16_t minhb = range_start >> 16;
+ uint16_t maxhb = range_end >> 16;
+
+ uint64_t card = 0;
+
+ int i = ra_get_index(ra, minhb);
+ if (i >= 0) {
+ if (minhb == maxhb) {
+ card += container_rank(ra->containers[i], ra->typecodes[i],
+ range_end & 0xffff);
+ } else {
+ card += container_get_cardinality(ra->containers[i],
+ ra->typecodes[i]);
+ }
+ if ((range_start & 0xffff) != 0) {
+ card -= container_rank(ra->containers[i], ra->typecodes[i],
+ (range_start & 0xffff) - 1);
+ }
+ i++;
+ } else {
+ i = -i - 1;
+ }
+
+ for (; i < ra->size; i++) {
+ uint16_t key = ra->keys[i];
+ if (key < maxhb) {
+ card += container_get_cardinality(ra->containers[i],
+ ra->typecodes[i]);
+ } else if (key == maxhb) {
+ card += container_rank(ra->containers[i], ra->typecodes[i],
+ range_end & 0xffff);
+ break;
+ } else {
+ break;
+ }
+ }
+
return card;
}
-static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load | (UINT64_C(1) << index);
- words[offset] = newload;
- list++;
+
+bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
+ return r->high_low_container.size == 0;
+}
+
+void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
+ ra_to_uint32_array(&r->high_low_container, ans);
+}
+
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
+ size_t offset, size_t limit,
+ uint32_t *ans) {
+ return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
+}
+
+/** convert array and bitmap containers to run containers when it is more
+ * efficient;
+ * also convert from run containers when more space efficient. Returns
+ * true if the result has at least one run container.
+*/
+bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
+ bool answer = false;
+ for (int i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original, type_after;
+ ra_unshare_container_at_index(
+ &r->high_low_container, i); // TODO: this introduces extra cloning!
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ container_t *c1 = convert_run_optimize(c, type_original, &type_after);
+ if (type_after == RUN_CONTAINER_TYPE) {
+ answer = true;
+ }
+ ra_set_container_at_index(&r->high_low_container, i, c1, type_after);
}
+ return answer;
}
-static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
- uint64_t length) {
- if( croaring_avx2() ) {
- return _asm_bitset_clear_list(words, card, list, length);
+size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
+ size_t answer = 0;
+ for (int i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ answer += container_shrink_to_fit(c, type_original);
+ }
+ answer += ra_shrink_to_fit(&r->high_low_container);
+ return answer;
+}
+
+/**
+ * Remove run-length encoding even when it is more space efficient
+ * return whether a change was applied
+ */
+bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
+ bool answer = false;
+ for (int i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original, type_after;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
+ answer = true;
+ if (type_original == SHARED_CONTAINER_TYPE) {
+ run_container_t *truec = CAST_run(CAST_shared(c)->container);
+ int32_t card = run_container_cardinality(truec);
+ container_t *c1 = convert_to_bitset_or_array_container(
+ truec, card, &type_after);
+ shared_container_free(CAST_shared(c)); // frees run as needed
+ ra_set_container_at_index(&r->high_low_container, i, c1,
+ type_after);
+
+ } else {
+ int32_t card = run_container_cardinality(CAST_run(c));
+ container_t *c1 = convert_to_bitset_or_array_container(
+ CAST_run(c), card, &type_after);
+ run_container_free(CAST_run(c));
+ ra_set_container_at_index(&r->high_low_container, i, c1,
+ type_after);
+ }
+ }
+ }
+ return answer;
+}
+
+size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) {
+ size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
+ uint64_t cardinality = roaring_bitmap_get_cardinality(r);
+ uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
+ if (portablesize < sizeasarray) {
+ buf[0] = CROARING_SERIALIZATION_CONTAINER;
+ return roaring_bitmap_portable_serialize(r, buf + 1) + 1;
} else {
- return _scalar_bitset_clear_list(words, card, list, length);
+ buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32;
+ memcpy(buf + 1, &cardinality, sizeof(uint32_t));
+ roaring_bitmap_to_uint32_array(
+ r, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
+ return 1 + (size_t)sizeasarray;
}
}
-static uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
- if( croaring_avx2() ) {
- return _asm_bitset_set_list_withcard(words, card, list, length);
+size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) {
+ size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
+ uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) +
+ sizeof(uint32_t);
+ return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
+}
+
+size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
+ return ra_portable_size_in_bytes(&r->high_low_container);
+}
+
+
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (ans == NULL) {
+ return NULL;
+ }
+ size_t bytesread;
+ bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
+ if(is_ok) assert(bytesread <= maxbytes);
+ roaring_bitmap_set_copy_on_write(ans, false);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ return ans;
+}
+
+roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
+ return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
+}
+
+
+size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
+ return ra_portable_deserialize_size(buf, maxbytes);
+}
+
+
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r,
+ char *buf) {
+ return ra_portable_serialize(&r->high_low_container, buf);
+}
+
+roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
+ const char *bufaschar = (const char *)buf;
+ if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
+ /* This looks like a compressed set of uint32_t elements */
+ uint32_t card;
+ memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+ const uint32_t *elems =
+ (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+ roaring_bitmap_t *bitmap = roaring_bitmap_create();
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ roaring_bulk_context_t context;
+
+ memset(&context, 0, sizeof(context));
+ for (uint32_t i = 0; i < card; i++) {
+ // elems may not be aligned, read with memcpy
+ uint32_t elem;
+ memcpy(&elem, elems + i, sizeof(elem));
+ roaring_bitmap_add_bulk(bitmap, &context, elem);
+ }
+ return bitmap;
+ } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
+ return roaring_bitmap_portable_deserialize(bufaschar + 1);
+ } else
+ return (NULL);
+}
+
+bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
+ void *ptr) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ for (int i = 0; i < ra->size; ++i)
+ if (!container_iterate(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16,
+ iterator, ptr)) {
+ return false;
+ }
+ return true;
+}
+
+bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
+ uint64_t high_bits, void *ptr) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ for (int i = 0; i < ra->size; ++i)
+ if (!container_iterate64(
+ ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16, iterator,
+ high_bits, ptr)) {
+ return false;
+ }
+ return true;
+}
+
+/****
+* begin roaring_uint32_iterator_t
+*****/
+
+// Partially initializes the roaring iterator when it begins looking at
+// a new container.
+static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
+ newit->in_container_index = 0;
+ newit->run_index = 0;
+ newit->current_value = 0;
+ if (newit->container_index >= newit->parent->high_low_container.size ||
+ newit->container_index < 0) {
+ newit->current_value = UINT32_MAX;
+ return (newit->has_value = false);
+ }
+ // assume not empty
+ newit->has_value = true;
+ // we precompute container, typecode and highbits so that successive
+ // iterators do not have to grab them from odd memory locations
+ // and have to worry about the (easily predicted) container_unwrap_shared
+ // call.
+ newit->container =
+ newit->parent->high_low_container.containers[newit->container_index];
+ newit->typecode =
+ newit->parent->high_low_container.typecodes[newit->container_index];
+ newit->highbits =
+ ((uint32_t)
+ newit->parent->high_low_container.keys[newit->container_index])
+ << 16;
+ newit->container =
+ container_unwrap_shared(newit->container, &(newit->typecode));
+ return newit->has_value;
+}
+
+static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
+ if (!iter_new_container_partial_init(newit))
+ return newit->has_value;
+
+ switch (newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(newit->container);
+
+ uint32_t wordindex = 0;
+ uint64_t word;
+ while ((word = bc->words[wordindex]) == 0) {
+ wordindex++; // advance
+ }
+ // here "word" is non-zero
+ newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(newit->container);
+ newit->current_value = newit->highbits | ac->array[0];
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(newit->container);
+ newit->current_value = newit->highbits | rc->runs[0].value;
+ break; }
+
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ } // switch (typecode)
+ return true;
+}
+
+static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
+ if (!iter_new_container_partial_init(newit))
+ return newit->has_value;
+
+ switch(newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
+ uint64_t word;
+ const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
+ while ((word = bitset_container->words[wordindex]) == 0)
+ --wordindex;
+
+ int num_leading_zeros = __builtin_clzll(word);
+ newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t* array_container = (const array_container_t*)newit->container;
+ newit->in_container_index = array_container->cardinality - 1;
+ newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t* run_container = (const run_container_t*)newit->container;
+ newit->run_index = run_container->n_runs - 1;
+ const rle16_t* last_run = &run_container->runs[newit->run_index];
+ newit->current_value = newit->highbits | (last_run->value + last_run->length);
+ break;
+ }
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ }
+ return true;
+}
+
+// prerequesite: the value should be in range of the container
+static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
+ // Don't have to check return value because of prerequisite
+ iter_new_container_partial_init(newit);
+ uint16_t lb = val & 0xFFFF;
+
+ switch (newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(newit->container);
+ newit->in_container_index =
+ bitset_container_index_equalorlarger(bc, lb);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(newit->container);
+ newit->in_container_index =
+ array_container_index_equalorlarger(ac, lb);
+ newit->current_value =
+ newit->highbits | ac->array[newit->in_container_index];
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(newit->container);
+ newit->run_index = run_container_index_equalorlarger(rc, lb);
+ if (rc->runs[newit->run_index].value <= lb) {
+ newit->current_value = val;
+ } else {
+ newit->current_value =
+ newit->highbits | rc->runs[newit->run_index].value;
+ }
+ break; }
+
+ default:
+ __builtin_unreachable();
+ }
+
+ return true;
+}
+
+void roaring_init_iterator(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit) {
+ newit->parent = r;
+ newit->container_index = 0;
+ newit->has_value = loadfirstvalue(newit);
+}
+
+void roaring_init_iterator_last(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit) {
+ newit->parent = r;
+ newit->container_index = newit->parent->high_low_container.size - 1;
+ newit->has_value = loadlastvalue(newit);
+}
+
+roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) {
+ roaring_uint32_iterator_t *newit =
+ (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ if (newit == NULL) return NULL;
+ roaring_init_iterator(r, newit);
+ return newit;
+}
+
+roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+ const roaring_uint32_iterator_t *it) {
+ roaring_uint32_iterator_t *newit =
+ (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
+ return newit;
+}
+
+bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
+ uint16_t hb = val >> 16;
+ const int i = ra_get_index(& it->parent->high_low_container, hb);
+ if (i >= 0) {
+ uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
+ uint16_t lb = val & 0xFFFF;
+ if(lowvalue < lb ) {
+ it->container_index = i+1; // will have to load first value of next container
+ } else {// the value is necessarily within the range of the container
+ it->container_index = i;
+ it->has_value = loadfirstvalue_largeorequal(it, val);
+ return it->has_value;
+ }
} else {
- return _scalar_bitset_set_list_withcard(words, card, list, length);
+ // there is no matching, so we are going for the next container
+ it->container_index = -i-1;
}
+ it->has_value = loadfirstvalue(it);
+ return it->has_value;
}
-static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
- if( croaring_avx2() ) {
- _asm_bitset_set_list(words, list, length);
+
+bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
+ if (it->container_index >= it->parent->high_low_container.size) {
+ return (it->has_value = false);
+ }
+ if (it->container_index < 0) {
+ it->container_index = 0;
+ return (it->has_value = loadfirstvalue(it));
+ }
+
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(it->container);
+ it->in_container_index++;
+
+ uint32_t wordindex = it->in_container_index / 64;
+ if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
+
+ uint64_t word = bc->words[wordindex] &
+ (UINT64_MAX << (it->in_container_index % 64));
+ // next part could be optimized/simplified
+ while ((word == 0) &&
+ (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
+ wordindex++;
+ word = bc->words[wordindex];
+ }
+ if (word != 0) {
+ it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+ it->current_value = it->highbits | it->in_container_index;
+ return (it->has_value = true);
+ }
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(it->container);
+ it->in_container_index++;
+ if (it->in_container_index < ac->cardinality) {
+ it->current_value =
+ it->highbits | ac->array[it->in_container_index];
+ return (it->has_value = true);
+ }
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ if(it->current_value == UINT32_MAX) { // avoid overflow to zero
+ return (it->has_value = false);
+ }
+
+ const run_container_t* rc = const_CAST_run(it->container);
+ uint32_t limit = (it->highbits | (rc->runs[it->run_index].value +
+ rc->runs[it->run_index].length));
+ if (++it->current_value <= limit) {
+ return (it->has_value = true);
+ }
+
+ if (++it->run_index < rc->n_runs) { // Assume the run has a value
+ it->current_value =
+ it->highbits | rc->runs[it->run_index].value;
+ return (it->has_value = true);
+ }
+ break;
+ }
+
+ default:
+ __builtin_unreachable();
+ }
+
+ // moving to next container
+ it->container_index++;
+ return (it->has_value = loadfirstvalue(it));
+}
+
+bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
+ if (it->container_index < 0) {
+ return (it->has_value = false);
+ }
+ if (it->container_index >= it->parent->high_low_container.size) {
+ it->container_index = it->parent->high_low_container.size - 1;
+ return (it->has_value = loadlastvalue(it));
+ }
+
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ if (--it->in_container_index < 0)
+ break;
+
+ const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
+ int32_t wordindex = it->in_container_index / 64;
+ uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
+
+ while (word == 0 && --wordindex >= 0) {
+ word = bitset_container->words[wordindex];
+ }
+ if (word == 0)
+ break;
+
+ int num_leading_zeros = __builtin_clzll(word);
+ it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
+ it->current_value = it->highbits | it->in_container_index;
+ return (it->has_value = true);
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ if (--it->in_container_index < 0)
+ break;
+
+ const array_container_t* array_container = (const array_container_t*)it->container;
+ it->current_value = it->highbits | array_container->array[it->in_container_index];
+ return (it->has_value = true);
+ }
+ case RUN_CONTAINER_TYPE: {
+ if(it->current_value == 0)
+ return (it->has_value = false);
+
+ const run_container_t* run_container = (const run_container_t*)it->container;
+ if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
+ return (it->has_value = true);
+ }
+
+ if (--it->run_index < 0)
+ break;
+
+ it->current_value = it->highbits | (run_container->runs[it->run_index].value +
+ run_container->runs[it->run_index].length);
+ return (it->has_value = true);
+ }
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ } // switch (typecode)
+
+ // moving to previous container
+ it->container_index--;
+ return (it->has_value = loadlastvalue(it));
+}
+
+uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
+ uint32_t ret = 0;
+ uint32_t num_values;
+ uint32_t wordindex; // used for bitsets
+ uint64_t word; // used for bitsets
+ const array_container_t* acont; //TODO remove
+ const run_container_t* rcont; //TODO remove
+ const bitset_container_t* bcont; //TODO remove
+
+ while (it->has_value && ret < count) {
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE:
+ bcont = const_CAST_bitset(it->container);
+ wordindex = it->in_container_index / 64;
+ word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
+ do {
+ while (word != 0 && ret < count) {
+ buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
+ word = word & (word - 1);
+ buf++;
+ ret++;
+ }
+ while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
+ wordindex++;
+ word = bcont->words[wordindex];
+ }
+ } while (word != 0 && ret < count);
+ it->has_value = (word != 0);
+ if (it->has_value) {
+ it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+ it->current_value = it->highbits | it->in_container_index;
+ }
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ acont = const_CAST_array(it->container);
+ num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
+ for (uint32_t i = 0; i < num_values; i++) {
+ buf[i] = it->highbits | acont->array[it->in_container_index + i];
+ }
+ buf += num_values;
+ ret += num_values;
+ it->in_container_index += num_values;
+ it->has_value = (it->in_container_index < acont->cardinality);
+ if (it->has_value) {
+ it->current_value = it->highbits | acont->array[it->in_container_index];
+ }
+ break;
+ case RUN_CONTAINER_TYPE:
+ rcont = const_CAST_run(it->container);
+ //"in_run_index" name is misleading, read it as "max_value_in_current_run"
+ do {
+ uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
+ num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret);
+ for (uint32_t i = 0; i < num_values; i++) {
+ buf[i] = it->current_value + i;
+ }
+ it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
+ buf += num_values;
+ ret += num_values;
+
+ if (it->current_value > largest_run_value || it->current_value == 0) {
+ it->run_index++;
+ if (it->run_index < rcont->n_runs) {
+ it->current_value = it->highbits | rcont->runs[it->run_index].value;
+ } else {
+ it->has_value = false;
+ }
+ }
+ } while ((ret < count) && it->has_value);
+ break;
+ default:
+ assert(false);
+ }
+ if (it->has_value) {
+ assert(ret == count);
+ return ret;
+ }
+ it->container_index++;
+ it->has_value = loadfirstvalue(it);
+ }
+ return ret;
+}
+
+
+
+void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); }
+
+/****
+* end of roaring_uint32_iterator_t
+*****/
+
+bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ const roaring_array_t *ra1 = &r1->high_low_container;
+ const roaring_array_t *ra2 = &r2->high_low_container;
+
+ if (ra1->size != ra2->size) {
+ return false;
+ }
+ for (int i = 0; i < ra1->size; ++i) {
+ if (ra1->keys[i] != ra2->keys[i]) {
+ return false;
+ }
+ }
+ for (int i = 0; i < ra1->size; ++i) {
+ bool areequal = container_equals(ra1->containers[i],
+ ra1->typecodes[i],
+ ra2->containers[i],
+ ra2->typecodes[i]);
+ if (!areequal) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ const roaring_array_t *ra1 = &r1->high_low_container;
+ const roaring_array_t *ra2 = &r2->high_low_container;
+
+ const int length1 = ra1->size,
+ length2 = ra2->size;
+
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(ra1, pos1);
+ const uint16_t s2 = ra_get_key_at_index(ra2, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2);
+ if (!container_is_subset(c1, type1, c2, type2))
+ return false;
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ return false;
+ } else { // s1 > s2
+ pos2 = ra_advance_until(ra2, s1, pos2);
+ }
+ }
+ if (pos1 == length1)
+ return true;
+ else
+ return false;
+}
+
+static void insert_flipped_container(roaring_array_t *ans_arr,
+ const roaring_array_t *x1_arr, uint16_t hb,
+ uint16_t lb_start, uint16_t lb_end) {
+ const int i = ra_get_index(x1_arr, hb);
+ const int j = ra_get_index(ans_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
+ (uint32_t)(lb_end + 1), &ctype_out);
+
+ if (container_get_cardinality(flipped_container, ctype_out))
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ else {
+ container_free(flipped_container, ctype_out);
+ }
} else {
- _scalar_bitset_set_list(words, list, length);
+ flipped_container = container_range_of_ones(
+ (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
}
}
-#else
-static uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
- uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *(const uint16_t *)list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load & ~(UINT64_C(1) << index);
- card -= (load ^ newload) >> index;
- words[offset] = newload;
- list++;
+
+static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
+ uint16_t lb_start, uint16_t lb_end) {
+ const int i = ra_get_index(x1_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container = container_inot_range(
+ container_to_flip, ctype_in, (uint32_t)lb_start,
+ (uint32_t)(lb_end + 1), &ctype_out);
+ // if a new container was created, the old one was already freed
+ if (container_get_cardinality(flipped_container, ctype_out)) {
+ ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
+ } else {
+ container_free(flipped_container, ctype_out);
+ ra_remove_at_index(x1_arr, i);
+ }
+
+ } else {
+ flipped_container = container_range_of_ones(
+ (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
+ ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
+ ctype_out);
}
- return card;
}
-static uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load | (UINT64_C(1) << index);
- card += (load ^ newload) >> index;
- words[offset] = newload;
- list++;
+static void insert_fully_flipped_container(roaring_array_t *ans_arr,
+ const roaring_array_t *x1_arr,
+ uint16_t hb) {
+ const int i = ra_get_index(x1_arr, hb);
+ const int j = ra_get_index(ans_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_not(container_to_flip, ctype_in, &ctype_out);
+ if (container_get_cardinality(flipped_container, ctype_out))
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ else {
+ container_free(flipped_container, ctype_out);
+ }
+ } else {
+ flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
}
- return card;
}
-static void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load | (UINT64_C(1) << index);
- words[offset] = newload;
- list++;
+static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
+ const int i = ra_get_index(x1_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_inot(container_to_flip, ctype_in, &ctype_out);
+
+ if (container_get_cardinality(flipped_container, ctype_out)) {
+ ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
+ } else {
+ container_free(flipped_container, ctype_out);
+ ra_remove_at_index(x1_arr, i);
+ }
+
+ } else {
+ flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
+ ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
+ uint64_t range_start,
+ uint64_t range_end) {
+ if (range_start >= range_end) {
+ return roaring_bitmap_copy(x1);
+ }
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+
+ roaring_bitmap_t *ans = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(ans, is_cow(x1));
+
+ uint16_t hb_start = (uint16_t)(range_start >> 16);
+ const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
+ uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
+ const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
+
+ ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
+ hb_start, is_cow(x1));
+ if (hb_start == hb_end) {
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_start, lb_start,
+ lb_end);
+ } else {
+ // start and end containers are distinct
+ if (lb_start > 0) {
+ // handle first (partial) container
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_start,
+ lb_start, 0xFFFF);
+ ++hb_start; // for the full containers. Can't wrap.
+ }
+
+ if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
+
+ for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
+ insert_fully_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb);
+ }
+
+ // handle a partial final container
+ if (lb_end != 0xFFFF) {
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_end + 1, 0,
+ lb_end);
+ ++hb_end;
+ }
+ }
+ ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
+ hb_end, is_cow(x1));
+ return ans;
+}
+
+void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
+ uint64_t range_end) {
+ if (range_start >= range_end) {
+ return; // empty range
+ }
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+
+ uint16_t hb_start = (uint16_t)(range_start >> 16);
+ const uint16_t lb_start = (uint16_t)range_start;
+ uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
+ const uint16_t lb_end = (uint16_t)(range_end - 1);
+
+ if (hb_start == hb_end) {
+ inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
+ lb_end);
+ } else {
+ // start and end containers are distinct
+ if (lb_start > 0) {
+ // handle first (partial) container
+ inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
+ 0xFFFF);
+ ++hb_start; // for the full containers. Can't wrap.
+ }
+
+ if (lb_end != 0xFFFF) --hb_end;
+
+ for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
+ inplace_fully_flip_container(&x1->high_low_container, hb);
+ }
+ // handle a partial final container
+ if (lb_end != 0xFFFF) {
+ inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
+ lb_end);
+ ++hb_end;
+ }
+ }
+}
+
+static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, uint8_t t) {
+ int size = ra_get_size(ra);
+ if (size == 0 || ra_get_key_at_index(ra, size-1) != k) {
+ // No merge.
+ ra_append(ra, k, c, t);
+ return;
+ }
+
+ uint8_t last_t, new_t;
+ container_t *last_c, *new_c;
+
+ // NOTE: we don't need to unwrap here, since we added last_c ourselves
+ // we have the certainty it's not a shared container.
+ // The same applies to c, as it's the result of calling container_offset.
+ last_c = ra_get_container_at_index(ra, size-1, &last_t);
+ new_c = container_ior(last_c, last_t, c, t, &new_t);
+
+ ra_set_container_at_index(ra, size-1, new_c, new_t);
+
+ // Comparison of pointers of different origin is UB (or so claim some compiler
+ // makers), so we compare their bit representation only.
+ if ((uintptr_t)last_c != (uintptr_t)new_c) {
+ container_free(last_c, last_t);
+ }
+ container_free(c, t);
+}
+
+// roaring_bitmap_add_offset adds the value 'offset' to each and every value in
+// a bitmap, generating a new bitmap in the process. If offset + element is
+// outside of the range [0,2^32), that the element will be dropped.
+// We need "offset" to be 64 bits because we want to support values
+// between -0xFFFFFFFF up to +0xFFFFFFFF.
+roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
+ int64_t offset) {
+ roaring_bitmap_t *answer;
+ roaring_array_t *ans_ra;
+ int64_t container_offset;
+ uint16_t in_offset;
+
+ const roaring_array_t *bm_ra = &bm->high_low_container;
+ int length = bm_ra->size;
+
+ if (offset == 0) {
+ return roaring_bitmap_copy(bm);
+ }
+
+ container_offset = offset >> 16;
+ in_offset = (uint16_t)(offset - container_offset * (1 << 16));
+
+ answer = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(answer, is_cow(bm));
+
+ ans_ra = &answer->high_low_container;
+
+ if (in_offset == 0) {
+ ans_ra = &answer->high_low_container;
+
+ for (int i = 0, j = 0; i < length; ++i) {
+ int64_t key = ra_get_key_at_index(bm_ra, i);
+ key += container_offset;
+
+ if (key < 0 || key >= (1 << 16)) {
+ continue;
+ }
+
+ ra_append_copy(ans_ra, bm_ra, i, false);
+ ans_ra->keys[j++] = key;
+ }
+
+ return answer;
+ }
+
+ uint8_t t;
+ const container_t *c;
+ container_t *lo, *hi, **lo_ptr, **hi_ptr;
+ int64_t k;
+
+ for (int i = 0; i < length; ++i) {
+ lo = hi = NULL;
+ lo_ptr = hi_ptr = NULL;
+
+ k = ra_get_key_at_index(bm_ra, i)+container_offset;
+ if (k >= 0 && k < (1 << 16)) {
+ lo_ptr = &lo;
+ }
+ if (k+1 >= 0 && k+1 < (1 << 16)) {
+ hi_ptr = &hi;
+ }
+ if (lo_ptr == NULL && hi_ptr == NULL) {
+ continue;
+ }
+
+ c = ra_get_container_at_index(bm_ra, i, &t);
+ c = container_unwrap_shared(c, &t);
+
+ container_add_offset(c, t, lo_ptr, hi_ptr, in_offset);
+ if (lo != NULL) {
+ offset_append_with_merge(ans_ra, k, lo, t);
+ }
+ if (hi != NULL) {
+ ra_append(ans_ra, k+1, hi, t);
+ }
}
+
+ return answer;
}
+roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2,
+ const bool bitsetconversion) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c;
+ if (bitsetconversion &&
+ (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) &&
+ (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)
+ ){
+ container_t *newc1 =
+ container_mutable_unwrap_shared(c1, &type1);
+ newc1 = container_to_bitset(newc1, type1);
+ type1 = BITSET_CONTAINER_TYPE;
+ c = container_lazy_ior(newc1, type1, c2, type2,
+ &result_type);
+ if (c != newc1) { // should not happen
+ container_free(newc1, type1);
+ }
+ } else {
+ c = container_lazy_or(c1, type1, c2, type2, &result_type);
+ }
+ // since we assume that the initial containers are non-empty,
+ // the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2,
+ const bool bitsetconversion) {
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ if (!container_is_full(c1, type1)) {
+ if ((bitsetconversion == false) ||
+ (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)
+ ){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ } else {
+ // convert to bitset
+ container_t *old_c1 = c1;
+ uint8_t old_type1 = type1;
+ c1 = container_mutable_unwrap_shared(c1, &type1);
+ c1 = container_to_bitset(c1, type1);
+ container_free(old_c1, old_type1);
+ type1 = BITSET_CONTAINER_TYPE;
+ }
+
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_lazy_ior(c1, type1, c2, type2,
+ &result_type);
+
+ if (c != c1) { // in this instance a new container was created,
+ // and we need to free the old one
+ container_free(c1, type1);
+ }
+
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // container_t *c2_clone = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_lazy_xor(
+ c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_lazy_xor(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // release
+ }
+ else {
+ c = container_lazy_ixor(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ ++pos1;
+ } else {
+ container_free(c, result_type);
+ ra_remove_at_index(&x1->high_low_container, pos1);
+ --length1;
+ }
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // container_t *c2_clone = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
+ roaring_array_t *ra = &r->high_low_container;
+
+ for (int i = 0; i < ra->size; ++i) {
+ const uint8_t old_type = ra->typecodes[i];
+ container_t *old_c = ra->containers[i];
+ uint8_t new_type = old_type;
+ container_t *new_c = container_repair_after_lazy(old_c, &new_type);
+ ra->containers[i] = new_c;
+ ra->typecodes[i] = new_type;
+ }
+}
+
+
+
+/**
+* roaring_bitmap_rank returns the number of integers that are smaller or equal
+* to x.
+*/
+uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
+ uint64_t size = 0;
+ uint32_t xhigh = x >> 16;
+ for (int i = 0; i < bm->high_low_container.size; i++) {
+ uint32_t key = bm->high_low_container.keys[i];
+ if (xhigh > key) {
+ size +=
+ container_get_cardinality(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i]);
+ } else if (xhigh == key) {
+ return size + container_rank(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i],
+ x & 0xFFFF);
+ } else {
+ return size;
+ }
+ }
+ return size;
+}
+
+/**
+* roaring_bitmap_smallest returns the smallest value in the set.
+* Returns UINT32_MAX if the set is empty.
+*/
+uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
+ if (bm->high_low_container.size > 0) {
+ container_t *c = bm->high_low_container.containers[0];
+ uint8_t type = bm->high_low_container.typecodes[0];
+ uint32_t key = bm->high_low_container.keys[0];
+ uint32_t lowvalue = container_minimum(c, type);
+ return lowvalue | (key << 16);
+ }
+ return UINT32_MAX;
+}
+
+/**
+* roaring_bitmap_smallest returns the greatest value in the set.
+* Returns 0 if the set is empty.
+*/
+uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
+ if (bm->high_low_container.size > 0) {
+ container_t *container =
+ bm->high_low_container.containers[bm->high_low_container.size - 1];
+ uint8_t typecode =
+ bm->high_low_container.typecodes[bm->high_low_container.size - 1];
+ uint32_t key =
+ bm->high_low_container.keys[bm->high_low_container.size - 1];
+ uint32_t lowvalue = container_maximum(container, typecode);
+ return lowvalue | (key << 16);
+ }
+ return 0;
+}
+
+bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
+ uint32_t *element) {
+ container_t *container;
+ uint8_t typecode;
+ uint16_t key;
+ uint32_t start_rank = 0;
+ int i = 0;
+ bool valid = false;
+ while (!valid && i < bm->high_low_container.size) {
+ container = bm->high_low_container.containers[i];
+ typecode = bm->high_low_container.typecodes[i];
+ valid =
+ container_select(container, typecode, &start_rank, rank, element);
+ i++;
+ }
+
+ if (valid) {
+ key = bm->high_low_container.keys[i - 1];
+ *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed
+ return true;
+ } else
+ return false;
+}
+
+bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint64_t answer = 0;
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ if (container_intersect(c1, type1, c2, type2))
+ return true;
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer != 0;
+}
+
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
+ uint64_t x, uint64_t y) {
+ if (x >= y) {
+ // Empty range.
+ return false;
+ }
+ roaring_uint32_iterator_t it;
+ roaring_init_iterator(bm, &it);
+ if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) {
+ // No values above x.
+ return false;
+ }
+ if (it.current_value >= y) {
+ // No values below y.
+ return false;
+ }
+ return true;
+}
+
+
+uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint64_t answer = 0;
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ answer += container_and_cardinality(c1, type1, c2, type2);
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer;
+}
+
+double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return (double)inter / (double)(c1 + c2 - inter);
+}
+
+uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 + c2 - inter;
+}
+
+uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 - inter;
+}
+
+uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 + c2 - 2 * inter;
+}
+
+
+bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ /*
+ * the next function call involves a binary search and lots of branching.
+ */
+ int32_t i = ra_get_index(&r->high_low_container, hb);
+ if (i < 0) return false;
+
+ uint8_t typecode;
+ // next call ought to be cheap
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ // rest might be a tad expensive, possibly involving another round of binary search
+ return container_contains(container, val & 0xFFFF, typecode);
+}
+
+
+/**
+ * Check whether a range of values from range_start (included) to range_end (excluded) is present
+ */
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+ if (range_start >= range_end) return true; // empty range are always contained!
+ if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
+ uint16_t hb_rs = (uint16_t)(range_start >> 16);
+ uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
+ const int32_t span = hb_re - hb_rs;
+ const int32_t hlc_sz = ra_get_size(&r->high_low_container);
+ if (hlc_sz < span + 1) {
+ return false;
+ }
+ int32_t is = ra_get_index(&r->high_low_container, hb_rs);
+ int32_t ie = ra_get_index(&r->high_low_container, hb_re);
+ ie = (ie < 0 ? -ie - 1 : ie);
+ if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
+ return false;
+ }
+ const uint32_t lb_rs = range_start & 0xFFFF;
+ const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
+ uint8_t type;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, is,
+ &type);
+ if (hb_rs == hb_re) {
+ return container_contains_range(c, lb_rs, lb_re, type);
+ }
+ if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
+ return false;
+ }
+ c = ra_get_container_at_index(&r->high_low_container, ie, &type);
+ if (!container_contains_range(c, 0, lb_re, type)) {
+ return false;
+ }
+ for (int32_t i = is + 1; i < ie; ++i) {
+ c = ra_get_container_at_index(&r->high_low_container, i, &type);
+ if (!container_is_full(c, type) ) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ return (roaring_bitmap_get_cardinality(r2) >
+ roaring_bitmap_get_cardinality(r1) &&
+ roaring_bitmap_is_subset(r1, r2));
+}
+
+
+/*
+ * FROZEN SERIALIZATION FORMAT DESCRIPTION
+ *
+ * -- (beginning must be aligned by 32 bytes) --
+ * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
+ * <run_data> rle16_t[total number of rle elements in all run containers]
+ * <array_data> uint16_t[total number of array elements in all array containers]
+ * <keys> uint16_t[num_containers]
+ * <counts> uint16_t[num_containers]
+ * <typecodes> uint8_t[num_containers]
+ * <header> uint32_t
+ *
+ * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
+ * and the number of containers (17 bits).
+ *
+ * <counts> stores number of elements for every container.
+ * Its meaning depends on container type.
+ * For array and bitset containers, this value is the container cardinality minus one.
+ * For run container, it is the number of rle_t elements (n_runs).
+ *
+ * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
+ * all containers of respective type.
+ *
+ * <*_data> and <keys> are kept close together because they are not accessed
+ * during deserilization. This may reduce IO in case of large mmaped bitmaps.
+ * All members have their native alignments during deserilization except <header>,
+ * which is not guaranteed to be aligned by 4 bytes.
+ */
+
+size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
+ const roaring_array_t *ra = &rb->high_low_container;
+ size_t num_bytes = 0;
+ for (int32_t i = 0; i < ra->size; i++) {
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ num_bytes += rc->n_runs * sizeof(rle16_t);
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ num_bytes += ac->cardinality * sizeof(uint16_t);
+ break;
+ }
+ default:
+ __builtin_unreachable();
+ }
+ }
+ num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
+ num_bytes += 4; // header
+ return num_bytes;
+}
+
+inline static void *arena_alloc(char **arena, size_t num_bytes) {
+ char *res = *arena;
+ *arena += num_bytes;
+ return res;
+}
+
+void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
+ /*
+ * Note: we do not require user to supply a specifically aligned buffer.
+ * Thus we have to use memcpy() everywhere.
+ */
+
+ const roaring_array_t *ra = &rb->high_low_container;
+
+ size_t bitset_zone_size = 0;
+ size_t run_zone_size = 0;
+ size_t array_zone_size = 0;
+ for (int32_t i = 0; i < ra->size; i++) {
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_zone_size +=
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ run_zone_size += rc->n_runs * sizeof(rle16_t);
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ array_zone_size += ac->cardinality * sizeof(uint16_t);
+ break;
+ }
+ default:
+ __builtin_unreachable();
+ }
+ }
+
+ uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
+ rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
+ uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
+ uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
+ uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);
+
+ for (int32_t i = 0; i < ra->size; i++) {
+ uint16_t count;
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc =
+ const_CAST_bitset(ra->containers[i]);
+ memcpy(bitset_zone, bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
+ if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ count = bc->cardinality - 1;
+ } else {
+ count = bitset_container_compute_cardinality(bc) - 1;
+ }
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ size_t num_bytes = rc->n_runs * sizeof(rle16_t);
+ memcpy(run_zone, rc->runs, num_bytes);
+ run_zone += rc->n_runs;
+ count = rc->n_runs;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ size_t num_bytes = ac->cardinality * sizeof(uint16_t);
+ memcpy(array_zone, ac->array, num_bytes);
+ array_zone += ac->cardinality;
+ count = ac->cardinality - 1;
+ break;
+ }
+ default:
+ __builtin_unreachable();
+ }
+ memcpy(&count_zone[i], &count, 2);
+ }
+ memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t));
+ memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t));
+ uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
+ memcpy(header_zone, &header, 4);
+}
+
+const roaring_bitmap_t *
+roaring_bitmap_frozen_view(const char *buf, size_t length) {
+ if ((uintptr_t)buf % 32 != 0) {
+ return NULL;
+ }
+
+ // cookie and num_containers
+ if (length < 4) {
+ return NULL;
+ }
+ uint32_t header;
+ memcpy(&header, buf + length - 4, 4); // header may be misaligned
+ if ((header & 0x7FFF) != FROZEN_COOKIE) {
+ return NULL;
+ }
+ int32_t num_containers = (header >> 15);
+
+ // typecodes, counts and keys
+ if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
+ return NULL;
+ }
+ uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
+ uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
+ uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);
+
+ // {bitset,array,run}_zone
+ int32_t num_bitset_containers = 0;
+ int32_t num_run_containers = 0;
+ int32_t num_array_containers = 0;
+ size_t bitset_zone_size = 0;
+ size_t run_zone_size = 0;
+ size_t array_zone_size = 0;
+ for (int32_t i = 0; i < num_containers; i++) {
+ switch (typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ num_bitset_containers++;
+ bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ case RUN_CONTAINER_TYPE:
+ num_run_containers++;
+ run_zone_size += counts[i] * sizeof(rle16_t);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ num_array_containers++;
+ array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
+ break;
+ default:
+ return NULL;
+ }
+ }
+ if (length != bitset_zone_size + run_zone_size + array_zone_size +
+ 5 * num_containers + 4) {
+ return NULL;
+ }
+ uint64_t *bitset_zone = (uint64_t*) (buf);
+ rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
+ uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
+
+ size_t alloc_size = 0;
+ alloc_size += sizeof(roaring_bitmap_t);
+ alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+ alloc_size += num_run_containers * sizeof(run_container_t);
+ alloc_size += num_array_containers * sizeof(array_container_t);
+
+ char *arena = (char *)roaring_malloc(alloc_size);
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ roaring_bitmap_t *rb = (roaring_bitmap_t *)
+ arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+ rb->high_low_container.allocation_size = num_containers;
+ rb->high_low_container.size = num_containers;
+ rb->high_low_container.keys = (uint16_t *)keys;
+ rb->high_low_container.typecodes = (uint8_t *)typecodes;
+ rb->high_low_container.containers =
+ (container_t **)arena_alloc(&arena,
+ sizeof(container_t*) * num_containers);
+ // Ensure offset of high_low_container.containers is known distance used in
+ // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the
+ // only allocation that precedes high_low_container.containers. If this is
+ // changed (new allocation or changed order), this offset will also need to
+ // be changed in the C++ wrapper.
+ assert(rb ==
+ (roaring_bitmap_t *)((char *)rb->high_low_container.containers -
+ sizeof(roaring_bitmap_t)));
+ for (int32_t i = 0; i < num_containers; i++) {
+ switch (typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_container_t *bitset = (bitset_container_t *)
+ arena_alloc(&arena, sizeof(bitset_container_t));
+ bitset->words = bitset_zone;
+ bitset->cardinality = counts[i] + UINT32_C(1);
+ rb->high_low_container.containers[i] = bitset;
+ bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ run_container_t *run = (run_container_t *)
+ arena_alloc(&arena, sizeof(run_container_t));
+ run->capacity = counts[i];
+ run->n_runs = counts[i];
+ run->runs = run_zone;
+ rb->high_low_container.containers[i] = run;
+ run_zone += run->n_runs;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ array_container_t *array = (array_container_t *)
+ arena_alloc(&arena, sizeof(array_container_t));
+ array->capacity = counts[i] + UINT32_C(1);
+ array->cardinality = counts[i] + UINT32_C(1);
+ array->array = array_zone;
+ rb->high_low_container.containers[i] = array;
+ array_zone += counts[i] + UINT32_C(1);
+ break;
+ }
+ default:
+ roaring_free(arena);
+ return NULL;
+ }
+ }
+
+ return rb;
+}
+
+ALLOW_UNALIGNED
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
+ char *start_of_buf = (char *) buf;
+ uint32_t cookie;
+ int32_t num_containers;
+ uint16_t *descriptive_headers;
+ uint32_t *offset_headers = NULL;
+ const char *run_flag_bitset = NULL;
+ bool hasrun = false;
+
+ // deserialize cookie
+ memcpy(&cookie, buf, sizeof(uint32_t));
+ buf += sizeof(uint32_t);
+ if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) {
+ memcpy(&num_containers, buf, sizeof(int32_t));
+ buf += sizeof(int32_t);
+ descriptive_headers = (uint16_t *) buf;
+ buf += num_containers * 2 * sizeof(uint16_t);
+ offset_headers = (uint32_t *) buf;
+ buf += num_containers * sizeof(uint32_t);
+ } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) {
+ num_containers = (cookie >> 16) + 1;
+ hasrun = true;
+ int32_t run_flag_bitset_size = (num_containers + 7) / 8;
+ run_flag_bitset = buf;
+ buf += run_flag_bitset_size;
+ descriptive_headers = (uint16_t *) buf;
+ buf += num_containers * 2 * sizeof(uint16_t);
+ if(num_containers >= NO_OFFSET_THRESHOLD) {
+ offset_headers = (uint32_t *) buf;
+ buf += num_containers * sizeof(uint32_t);
+ }
+ } else {
+ return NULL;
+ }
+
+ // calculate total size for allocation
+ int32_t num_bitset_containers = 0;
+ int32_t num_run_containers = 0;
+ int32_t num_array_containers = 0;
+
+ for (int32_t i = 0; i < num_containers; i++) {
+ uint16_t tmp;
+ memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ uint32_t cardinality = tmp + 1;
+ bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+
+ if (isbitmap) {
+ num_bitset_containers++;
+ } else if (isrun) {
+ num_run_containers++;
+ } else {
+ num_array_containers++;
+ }
+ }
+
+ size_t alloc_size = 0;
+ alloc_size += sizeof(roaring_bitmap_t);
+ alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+ alloc_size += num_run_containers * sizeof(run_container_t);
+ alloc_size += num_array_containers * sizeof(array_container_t);
+ alloc_size += num_containers * sizeof(uint16_t); // keys
+ alloc_size += num_containers * sizeof(uint8_t); // typecodes
+
+ // allocate bitmap and construct containers
+ char *arena = (char *)roaring_malloc(alloc_size);
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ roaring_bitmap_t *rb = (roaring_bitmap_t *)
+ arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+ rb->high_low_container.allocation_size = num_containers;
+ rb->high_low_container.size = num_containers;
+ rb->high_low_container.containers =
+ (container_t **)arena_alloc(&arena,
+ sizeof(container_t*) * num_containers);
+
+ uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
+ uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));
+
+ rb->high_low_container.keys = keys;
+ rb->high_low_container.typecodes = typecodes;
+
+ for (int32_t i = 0; i < num_containers; i++) {
+ uint16_t tmp;
+ memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ int32_t cardinality = tmp + 1;
+ bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+
+ keys[i] = descriptive_headers[2*i];
+
+ if (isbitmap) {
+ typecodes[i] = BITSET_CONTAINER_TYPE;
+ bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t));
+ c->cardinality = cardinality;
+ if(offset_headers != NULL) {
+ c->words = (uint64_t *) (start_of_buf + offset_headers[i]);
+ } else {
+ c->words = (uint64_t *) buf;
+ buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ } else if (isrun) {
+ typecodes[i] = RUN_CONTAINER_TYPE;
+ run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
+ c->capacity = cardinality;
+ uint16_t n_runs;
+ if(offset_headers != NULL) {
+ memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t));
+ c->n_runs = n_runs;
+ c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t));
+ } else {
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ c->n_runs = n_runs;
+ buf += sizeof(uint16_t);
+ c->runs = (rle16_t *) buf;
+ buf += c->n_runs * sizeof(rle16_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ } else {
+ typecodes[i] = ARRAY_CONTAINER_TYPE;
+ array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t));
+ c->cardinality = cardinality;
+ c->capacity = cardinality;
+ if(offset_headers != NULL) {
+ c->array = (uint16_t *) (start_of_buf + offset_headers[i]);
+ } else {
+ c->array = (uint16_t *) buf;
+ buf += cardinality * sizeof(uint16_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ }
+ }
+
+ return rb;
+}
+
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring {
#endif
+/* end file src/roaring.c */
+/* begin file src/roaring_array.c */
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
-/* flip specified bits */
-/* TODO: consider whether worthwhile to make an asm version */
-static uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load ^ (UINT64_C(1) << index);
- // todo: is a branch here all that bad?
- card +=
- (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1
- words[offset] = newload;
- list++;
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// Convention: [0,ra->size) all elements are initialized
+// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing
+
+extern inline int32_t ra_get_size(const roaring_array_t *ra);
+extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
+
+extern inline container_t *ra_get_container_at_index(
+ const roaring_array_t *ra, uint16_t i,
+ uint8_t *typecode);
+
+extern inline void ra_unshare_container_at_index(roaring_array_t *ra,
+ uint16_t i);
+
+extern inline void ra_replace_key_and_container_at_index(
+ roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode);
+
+extern inline void ra_set_container_at_index(
+ const roaring_array_t *ra, int32_t i,
+ container_t *c, uint8_t typecode);
+
+static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
+ //
+ // Note: not implemented using C's realloc(), because the memory layout is
+ // Struct-of-Arrays vs. Array-of-Structs:
+ // https://github.com/RoaringBitmap/CRoaring/issues/256
+
+ if ( new_capacity == 0 ) {
+ roaring_free(ra->containers);
+ ra->containers = NULL;
+ ra->keys = NULL;
+ ra->typecodes = NULL;
+ ra->allocation_size = 0;
+ return true;
}
- return card;
+ const size_t memoryneeded = new_capacity * (
+ sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
+ void *bigalloc = roaring_malloc(memoryneeded);
+ if (!bigalloc) return false;
+ void *oldbigalloc = ra->containers;
+ container_t **newcontainers = (container_t **)bigalloc;
+ uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
+ uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
+ assert((char *)(newtypecodes + new_capacity) ==
+ (char *)bigalloc + memoryneeded);
+ if(ra->size > 0) {
+ memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size);
+ memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
+ memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
+ }
+ ra->containers = newcontainers;
+ ra->keys = newkeys;
+ ra->typecodes = newtypecodes;
+ ra->allocation_size = new_capacity;
+ roaring_free(oldbigalloc);
+ return true;
}
-static void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
- uint64_t offset, load, newload, pos, index;
- const uint16_t *end = list + length;
- while (list != end) {
- pos = *list;
- offset = pos >> 6;
- index = pos % 64;
- load = words[offset];
- newload = load ^ (UINT64_C(1) << index);
- words[offset] = newload;
- list++;
+bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
+ if (!new_ra) return false;
+ ra_init(new_ra);
+
+ if (cap > INT32_MAX) { return false; }
+
+ if(cap > 0) {
+ void *bigalloc = roaring_malloc(cap *
+ (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)));
+ if( bigalloc == NULL ) return false;
+ new_ra->containers = (container_t **)bigalloc;
+ new_ra->keys = (uint16_t *)(new_ra->containers + cap);
+ new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
+ // Narrowing is safe because of above check
+ new_ra->allocation_size = (int32_t)cap;
}
+ return true;
+}
+
+int ra_shrink_to_fit(roaring_array_t *ra) {
+ int savings = (ra->allocation_size - ra->size) *
+ (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
+ if (!realloc_array(ra, ra->size)) {
+ return 0;
+ }
+ ra->allocation_size = ra->size;
+ return savings;
+}
+
+void ra_init(roaring_array_t *new_ra) {
+ if (!new_ra) { return; }
+ new_ra->keys = NULL;
+ new_ra->containers = NULL;
+ new_ra->typecodes = NULL;
+
+ new_ra->allocation_size = 0;
+ new_ra->size = 0;
+ new_ra->flags = 0;
+}
+
+bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
+ bool copy_on_write) {
+ ra_clear_containers(dest); // we are going to overwrite them
+ if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size
+ dest->size = 0; // <--- This is important.
+ return true; // output was just cleared, so they match
+ }
+ if (dest->allocation_size < source->size) {
+ if (!realloc_array(dest, source->size)) {
+ return false;
+ }
+ }
+ dest->size = source->size;
+ memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
+ // we go through the containers, turning them into shared containers...
+ if (copy_on_write) {
+ for (int32_t i = 0; i < dest->size; ++i) {
+ source->containers[i] = get_copy_of_container(
+ source->containers[i], &source->typecodes[i], copy_on_write);
+ }
+ // we do a shallow copy to the other bitmap
+ memcpy(dest->containers, source->containers,
+ dest->size * sizeof(container_t *));
+ memcpy(dest->typecodes, source->typecodes,
+ dest->size * sizeof(uint8_t));
+ } else {
+ memcpy(dest->typecodes, source->typecodes,
+ dest->size * sizeof(uint8_t));
+ for (int32_t i = 0; i < dest->size; i++) {
+ dest->containers[i] =
+ container_clone(source->containers[i], source->typecodes[i]);
+ if (dest->containers[i] == NULL) {
+ for (int32_t j = 0; j < i; j++) {
+ container_free(dest->containers[j], dest->typecodes[j]);
+ }
+ ra_clear_without_containers(dest);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void ra_clear_containers(roaring_array_t *ra) {
+ for (int32_t i = 0; i < ra->size; ++i) {
+ container_free(ra->containers[i], ra->typecodes[i]);
+ }
+}
+
+void ra_reset(roaring_array_t *ra) {
+ ra_clear_containers(ra);
+ ra->size = 0;
+ ra_shrink_to_fit(ra);
+}
+
+void ra_clear_without_containers(roaring_array_t *ra) {
+ roaring_free(ra->containers); // keys and typecodes are allocated with containers
+ ra->size = 0;
+ ra->allocation_size = 0;
+ ra->containers = NULL;
+ ra->keys = NULL;
+ ra->typecodes = NULL;
+}
+
+void ra_clear(roaring_array_t *ra) {
+ ra_clear_containers(ra);
+ ra_clear_without_containers(ra);
+}
+
+bool extend_array(roaring_array_t *ra, int32_t k) {
+ int32_t desired_size = ra->size + k;
+ const int32_t max_containers = 65536;
+ assert(desired_size <= max_containers);
+ if (desired_size > ra->allocation_size) {
+ int32_t new_capacity =
+ (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
+ if (new_capacity > max_containers) {
+ new_capacity = max_containers;
+ }
+
+ return realloc_array(ra, new_capacity);
+ }
+ return true;
+}
+
+void ra_append(
+ roaring_array_t *ra, uint16_t key,
+ container_t *c, uint8_t typecode
+){
+ extend_array(ra, 1);
+ const int32_t pos = ra->size;
+
+ ra->keys[pos] = key;
+ ra->containers[pos] = c;
+ ra->typecodes[pos] = typecode;
+ ra->size++;
+}
+
+void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t index, bool copy_on_write) {
+ extend_array(ra, 1);
+ const int32_t pos = ra->size;
+
+ // old contents is junk not needing freeing
+ ra->keys[pos] = sa->keys[index];
+ // the shared container will be in two bitmaps
+ if (copy_on_write) {
+ sa->containers[index] = get_copy_of_container(
+ sa->containers[index], &sa->typecodes[index], copy_on_write);
+ ra->containers[pos] = sa->containers[index];
+ ra->typecodes[pos] = sa->typecodes[index];
+ } else {
+ ra->containers[pos] =
+ container_clone(sa->containers[index], sa->typecodes[index]);
+ ra->typecodes[pos] = sa->typecodes[index];
+ }
+ ra->size++;
+}
+
+void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t stopping_key, bool copy_on_write) {
+ for (int32_t i = 0; i < sa->size; ++i) {
+ if (sa->keys[i] >= stopping_key) break;
+ ra_append_copy(ra, sa, i, copy_on_write);
+ }
+}
+
+void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
+ int32_t start_index, int32_t end_index,
+ bool copy_on_write) {
+ extend_array(ra, end_index - start_index);
+ for (int32_t i = start_index; i < end_index; ++i) {
+ const int32_t pos = ra->size;
+ ra->keys[pos] = sa->keys[i];
+ if (copy_on_write) {
+ sa->containers[i] = get_copy_of_container(
+ sa->containers[i], &sa->typecodes[i], copy_on_write);
+ ra->containers[pos] = sa->containers[i];
+ ra->typecodes[pos] = sa->typecodes[i];
+ } else {
+ ra->containers[pos] =
+ container_clone(sa->containers[i], sa->typecodes[i]);
+ ra->typecodes[pos] = sa->typecodes[i];
+ }
+ ra->size++;
+ }
+}
+
+void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t before_start, bool copy_on_write) {
+ int start_location = ra_get_index(sa, before_start);
+ if (start_location >= 0)
+ ++start_location;
+ else
+ start_location = -start_location - 1;
+ ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write);
+}
+
+void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
+ int32_t start_index, int32_t end_index) {
+ extend_array(ra, end_index - start_index);
+
+ for (int32_t i = start_index; i < end_index; ++i) {
+ const int32_t pos = ra->size;
+
+ ra->keys[pos] = sa->keys[i];
+ ra->containers[pos] = sa->containers[i];
+ ra->typecodes[pos] = sa->typecodes[i];
+ ra->size++;
+ }
+}
+
+void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
+ int32_t start_index, int32_t end_index,
+ bool copy_on_write) {
+ extend_array(ra, end_index - start_index);
+
+ for (int32_t i = start_index; i < end_index; ++i) {
+ const int32_t pos = ra->size;
+ ra->keys[pos] = sa->keys[i];
+ if (copy_on_write) {
+ sa->containers[i] = get_copy_of_container(
+ sa->containers[i], &sa->typecodes[i], copy_on_write);
+ ra->containers[pos] = sa->containers[i];
+ ra->typecodes[pos] = sa->typecodes[i];
+ } else {
+ ra->containers[pos] =
+ container_clone(sa->containers[i], sa->typecodes[i]);
+ ra->typecodes[pos] = sa->typecodes[i];
+ }
+ ra->size++;
+ }
+}
+
+container_t *ra_get_container(
+ roaring_array_t *ra, uint16_t x, uint8_t *typecode
+){
+ int i = binarySearch(ra->keys, (int32_t)ra->size, x);
+ if (i < 0) return NULL;
+ *typecode = ra->typecodes[i];
+ return ra->containers[i];
+}
+
+extern inline container_t *ra_get_container_at_index(
+ const roaring_array_t *ra, uint16_t i,
+ uint8_t *typecode);
+
+extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra,
+ uint16_t i);
+
+extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
+
+extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
+ int32_t pos);
+
+// everything skipped over is freed
+int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
+ while (pos < ra->size && ra->keys[pos] < x) {
+ container_free(ra->containers[pos], ra->typecodes[pos]);
+ ++pos;
+ }
+ return pos;
+}
+
+void ra_insert_new_key_value_at(
+ roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode
+){
+ extend_array(ra, 1);
+ // May be an optimization opportunity with DIY memmove
+ memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
+ sizeof(uint16_t) * (ra->size - i));
+ memmove(&(ra->containers[i + 1]), &(ra->containers[i]),
+ sizeof(container_t *) * (ra->size - i));
+ memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]),
+ sizeof(uint8_t) * (ra->size - i));
+ ra->keys[i] = key;
+ ra->containers[i] = c;
+ ra->typecodes[i] = typecode;
+ ra->size++;
+}
+
+// note: Java routine set things to 0, enabling GC.
+// Java called it "resize" but it was always used to downsize.
+// Allowing upsize would break the conventions about
+// valid containers below ra->size.
+
+void ra_downsize(roaring_array_t *ra, int32_t new_length) {
+ assert(new_length <= ra->size);
+ ra->size = new_length;
+}
+
+void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
+ memmove(&(ra->containers[i]), &(ra->containers[i + 1]),
+ sizeof(container_t *) * (ra->size - i - 1));
+ memmove(&(ra->keys[i]), &(ra->keys[i + 1]),
+ sizeof(uint16_t) * (ra->size - i - 1));
+ memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]),
+ sizeof(uint8_t) * (ra->size - i - 1));
+ ra->size--;
+}
+
+void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
+ container_free(ra->containers[i], ra->typecodes[i]);
+ ra_remove_at_index(ra, i);
+}
+
+// used in inplace andNot only, to slide left the containers from
+// the mutated RoaringBitmap that are after the largest container of
+// the argument RoaringBitmap. In use it should be followed by a call to
+// downsize.
+//
+void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
+ uint32_t new_begin) {
+ assert(begin <= end);
+ assert(new_begin < begin);
+
+ const int range = end - begin;
+
+ // We ensure to previously have freed overwritten containers
+ // that are not copied elsewhere
+
+ memmove(&(ra->containers[new_begin]), &(ra->containers[begin]),
+ sizeof(container_t *) * range);
+ memmove(&(ra->keys[new_begin]), &(ra->keys[begin]),
+ sizeof(uint16_t) * range);
+ memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]),
+ sizeof(uint8_t) * range);
+}
+
+void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
+ if (distance > 0) {
+ extend_array(ra, distance);
+ }
+ int32_t srcpos = ra->size - count;
+ int32_t dstpos = srcpos + distance;
+ memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]),
+ sizeof(uint16_t) * count);
+ memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
+ sizeof(container_t *) * count);
+ memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
+ sizeof(uint8_t) * count);
+ ra->size += distance;
+}
+
+
+void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
+ size_t ctr = 0;
+ for (int32_t i = 0; i < ra->size; ++i) {
+ int num_added = container_to_uint32_array(
+ ans + ctr, ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ ctr += num_added;
+ }
+}
+
+bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
+ size_t ctr = 0;
+ size_t dtr = 0;
+
+ size_t t_limit = 0;
+
+ bool first = false;
+ size_t first_skip = 0;
+
+ uint32_t *t_ans = NULL;
+ size_t cur_len = 0;
+
+ for (int i = 0; i < ra->size; ++i) {
+
+ const container_t *c = container_unwrap_shared(
+ ra->containers[i], &ra->typecodes[i]);
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ t_limit = (const_CAST_bitset(c))->cardinality;
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ t_limit = (const_CAST_array(c))->cardinality;
+ break;
+ case RUN_CONTAINER_TYPE:
+ t_limit = run_container_cardinality(const_CAST_run(c));
+ break;
+ }
+ if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
+ if (!first){
+ //first_skip = t_limit - (ctr + t_limit - offset);
+ first_skip = offset - ctr;
+ first = true;
+ t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * (first_skip + limit));
+ if(t_ans == NULL) {
+ return false;
+ }
+ memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ;
+ cur_len = first_skip + limit;
+ }
+ if (dtr + t_limit > cur_len){
+ uint32_t * append_ans = (uint32_t *)roaring_malloc(sizeof(*append_ans) * (cur_len + t_limit));
+ if(append_ans == NULL) {
+ if(t_ans != NULL) roaring_free(t_ans);
+ return false;
+ }
+ memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit));
+ cur_len = cur_len + t_limit;
+ memcpy(append_ans, t_ans, dtr * sizeof(uint32_t));
+ roaring_free(t_ans);
+ t_ans = append_ans;
+ }
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ container_to_uint32_array(
+ t_ans + dtr,
+ const_CAST_bitset(c), ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ container_to_uint32_array(
+ t_ans + dtr,
+ const_CAST_array(c), ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ break;
+ case RUN_CONTAINER_TYPE:
+ container_to_uint32_array(
+ t_ans + dtr,
+ const_CAST_run(c), ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ break;
+ }
+ dtr += t_limit;
+ }
+ ctr += t_limit;
+ if (dtr-first_skip >= limit) break;
+ }
+ if(t_ans != NULL) {
+ memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t));
+ free(t_ans);
+ }
+ return true;
+}
+
+bool ra_has_run_container(const roaring_array_t *ra) {
+ for (int32_t k = 0; k < ra->size; ++k) {
+ if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
+ RUN_CONTAINER_TYPE)
+ return true;
+ }
+ return false;
+}
+
+uint32_t ra_portable_header_size(const roaring_array_t *ra) {
+ if (ra_has_run_container(ra)) {
+ if (ra->size <
+ NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets
+ return 4 + (ra->size + 7) / 8 + 4 * ra->size;
+ }
+ return 4 + (ra->size + 7) / 8 +
+ 8 * ra->size; // - 4 because we pack the size with the cookie
+ } else {
+ return 4 + 4 + 8 * ra->size;
+ }
+}
+
+size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
+ size_t count = ra_portable_header_size(ra);
+
+ for (int32_t k = 0; k < ra->size; ++k) {
+ count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
+ }
+ return count;
+}
+
+// This function is endian-sensitive.
+size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
+ char *initbuf = buf;
+ uint32_t startOffset = 0;
+ bool hasrun = ra_has_run_container(ra);
+ if (hasrun) {
+ uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
+ memcpy(buf, &cookie, sizeof(cookie));
+ buf += sizeof(cookie);
+ uint32_t s = (ra->size + 7) / 8;
+ uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1);
+ assert(bitmapOfRunContainers != NULL); // todo: handle
+ for (int32_t i = 0; i < ra->size; ++i) {
+ if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
+ RUN_CONTAINER_TYPE) {
+ bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
+ }
+ }
+ memcpy(buf, bitmapOfRunContainers, s);
+ buf += s;
+ roaring_free(bitmapOfRunContainers);
+ if (ra->size < NO_OFFSET_THRESHOLD) {
+ startOffset = 4 + 4 * ra->size + s;
+ } else {
+ startOffset = 4 + 8 * ra->size + s;
+ }
+ } else { // backwards compatibility
+ uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
+
+ memcpy(buf, &cookie, sizeof(cookie));
+ buf += sizeof(cookie);
+ memcpy(buf, &ra->size, sizeof(ra->size));
+ buf += sizeof(ra->size);
+
+ startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
+ }
+ for (int32_t k = 0; k < ra->size; ++k) {
+ memcpy(buf, &ra->keys[k], sizeof(ra->keys[k]));
+ buf += sizeof(ra->keys[k]);
+ // get_cardinality returns a value in [1,1<<16], subtracting one
+ // we get [0,1<<16 - 1] which fits in 16 bits
+ uint16_t card = (uint16_t)(
+ container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1);
+ memcpy(buf, &card, sizeof(card));
+ buf += sizeof(card);
+ }
+ if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
+ // writing the containers offsets
+ for (int32_t k = 0; k < ra->size; k++) {
+ memcpy(buf, &startOffset, sizeof(startOffset));
+ buf += sizeof(startOffset);
+ startOffset =
+ startOffset +
+ container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
+ }
+ }
+ for (int32_t k = 0; k < ra->size; ++k) {
+ buf += container_write(ra->containers[k], ra->typecodes[k], buf);
+ }
+ return buf - initbuf;
+}
+
+// Quickly checks whether there is a serialized bitmap at the pointer,
+// not exceeding size "maxbytes" in bytes. This function does not allocate
+// memory dynamically.
+//
+// This function returns 0 if and only if no valid bitmap is found.
+// Otherwise, it returns how many bytes are occupied.
+//
+size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
+ size_t bytestotal = sizeof(int32_t);// for cookie
+ if(bytestotal > maxbytes) return 0;
+ uint32_t cookie;
+ memcpy(&cookie, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
+ cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
+ return 0;
+ }
+ int32_t size;
+
+ if ((cookie & 0xFFFF) == SERIAL_COOKIE)
+ size = (cookie >> 16) + 1;
+ else {
+ bytestotal += sizeof(int32_t);
+ if(bytestotal > maxbytes) return 0;
+ memcpy(&size, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ }
+ if (size > (1<<16)) {
+ return 0; // logically impossible
+ }
+ char *bitmapOfRunContainers = NULL;
+ bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
+ if (hasrun) {
+ int32_t s = (size + 7) / 8;
+ bytestotal += s;
+ if(bytestotal > maxbytes) return 0;
+ bitmapOfRunContainers = (char *)buf;
+ buf += s;
+ }
+ bytestotal += size * 2 * sizeof(uint16_t);
+ if(bytestotal > maxbytes) return 0;
+ uint16_t *keyscards = (uint16_t *)buf;
+ buf += size * 2 * sizeof(uint16_t);
+ if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
+ // skipping the offsets
+ bytestotal += size * 4;
+ if(bytestotal > maxbytes) return 0;
+ buf += size * 4;
+ }
+ // Reading the containers
+ for (int32_t k = 0; k < size; ++k) {
+ uint16_t tmp;
+ memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
+ uint32_t thiscard = tmp + 1;
+ bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+ if (isbitmap) {
+ size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ bytestotal += containersize;
+ if(bytestotal > maxbytes) return 0;
+ buf += containersize;
+ } else if (isrun) {
+ bytestotal += sizeof(uint16_t);
+ if(bytestotal > maxbytes) return 0;
+ uint16_t n_runs;
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ buf += sizeof(uint16_t);
+ size_t containersize = n_runs * sizeof(rle16_t);
+ bytestotal += containersize;
+ if(bytestotal > maxbytes) return 0;
+ buf += containersize;
+ } else {
+ size_t containersize = thiscard * sizeof(uint16_t);
+ bytestotal += containersize;
+ if(bytestotal > maxbytes) return 0;
+ buf += containersize;
+ }
+ }
+ return bytestotal;
+}
+
+// this function populates answer from the content of buf (reading up to maxbytes bytes).
+// The function returns false if a properly serialized bitmap cannot be found.
+// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
+//
+// This function is endian-sensitive.
+bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
+ *readbytes = sizeof(int32_t);// for cookie
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
+ return false;
+ }
+ uint32_t cookie;
+ memcpy(&cookie, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
+ cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
+ fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
+ cookie);
+ return false;
+ }
+ int32_t size;
+
+ if ((cookie & 0xFFFF) == SERIAL_COOKIE)
+ size = (cookie >> 16) + 1;
+ else {
+ *readbytes += sizeof(int32_t);
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
+ return false;
+ }
+ memcpy(&size, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ }
+ if (size < 0) {
+ fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n",
+ size);
+ return false; // logically impossible
+ }
+ if (size > (1<<16)) {
+ fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
+ size);
+ return false; // logically impossible
+ }
+ const char *bitmapOfRunContainers = NULL;
+ bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
+ if (hasrun) {
+ int32_t s = (size + 7) / 8;
+ *readbytes += s;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
+ return false;
+ }
+ bitmapOfRunContainers = buf;
+ buf += s;
+ }
+ uint16_t *keyscards = (uint16_t *)buf;
+
+ *readbytes += size * 2 * sizeof(uint16_t);
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
+ return false;
+ }
+ buf += size * 2 * sizeof(uint16_t);
+
+ bool is_ok = ra_init_with_capacity(answer, size);
+ if (!is_ok) {
+ fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
+ return false;
+ }
+
+ for (int32_t k = 0; k < size; ++k) {
+ uint16_t tmp;
+ memcpy(&tmp, keyscards + 2*k, sizeof(tmp));
+ answer->keys[k] = tmp;
+ }
+ if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
+ *readbytes += size * 4;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Ran out of bytes while reading offsets.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+
+ // skipping the offsets
+ buf += size * 4;
+ }
+ // Reading the containers
+ for (int32_t k = 0; k < size; ++k) {
+ uint16_t tmp;
+ memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
+ uint32_t thiscard = tmp + 1;
+ bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+ if (isbitmap) {
+ // we check that the read is allowed
+ size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ *readbytes += containersize;
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ // it is now safe to read
+ bitset_container_t *c = bitset_container_create();
+ if(c == NULL) {// memory allocation failure
+ fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ answer->size++;
+ buf += bitset_container_read(thiscard, c, buf);
+ answer->containers[k] = c;
+ answer->typecodes[k] = BITSET_CONTAINER_TYPE;
+ } else if (isrun) {
+ // we check that the read is allowed
+ *readbytes += sizeof(uint16_t);
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ uint16_t n_runs;
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ size_t containersize = n_runs * sizeof(rle16_t);
+ *readbytes += containersize;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Running out of bytes while reading a run container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ // it is now safe to read
+
+ run_container_t *c = run_container_create();
+ if(c == NULL) {// memory allocation failure
+ fprintf(stderr, "Failed to allocate memory for a run container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ answer->size++;
+ buf += run_container_read(thiscard, c, buf);
+ answer->containers[k] = c;
+ answer->typecodes[k] = RUN_CONTAINER_TYPE;
+ } else {
+ // we check that the read is allowed
+ size_t containersize = thiscard * sizeof(uint16_t);
+ *readbytes += containersize;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Running out of bytes while reading an array container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ // it is now safe to read
+ array_container_t *c =
+ array_container_create_given_capacity(thiscard);
+ if(c == NULL) {// memory allocation failure
+ fprintf(stderr, "Failed to allocate memory for an array container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ answer->size++;
+ buf += array_container_read(thiscard, c, buf);
+ answer->containers[k] = c;
+ answer->typecodes[k] = ARRAY_CONTAINER_TYPE;
+ }
+ }
+ return true;
}
#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
#endif
-/* end file src/bitset_util.c */
+/* end file src/roaring_array.c */
+/* begin file src/roaring_priority_queue.c */
+
+
+#ifdef __cplusplus
+using namespace ::roaring::internal;
+
+extern "C" { namespace roaring { namespace api {
+#endif
+
+struct roaring_pq_element_s {
+ uint64_t size;
+ bool is_temporary;
+ roaring_bitmap_t *bitmap;
+};
+
+typedef struct roaring_pq_element_s roaring_pq_element_t;
+
+struct roaring_pq_s {
+ roaring_pq_element_t *elements;
+ uint64_t size;
+};
+
+typedef struct roaring_pq_s roaring_pq_t;
+
+static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) {
+ return t1->size < t2->size;
+}
+
+static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) {
+ uint64_t i = pq->size;
+ pq->elements[pq->size++] = *t;
+ while (i > 0) {
+ uint64_t p = (i - 1) >> 1;
+ roaring_pq_element_t ap = pq->elements[p];
+ if (!compare(t, &ap)) break;
+ pq->elements[i] = ap;
+ i = p;
+ }
+ pq->elements[i] = *t;
+}
+
+static void pq_free(roaring_pq_t *pq) {
+ roaring_free(pq);
+}
+
+static void percolate_down(roaring_pq_t *pq, uint32_t i) {
+ uint32_t size = (uint32_t)pq->size;
+ uint32_t hsize = size >> 1;
+ roaring_pq_element_t ai = pq->elements[i];
+ while (i < hsize) {
+ uint32_t l = (i << 1) + 1;
+ uint32_t r = l + 1;
+ roaring_pq_element_t bestc = pq->elements[l];
+ if (r < size) {
+ if (compare(pq->elements + r, &bestc)) {
+ l = r;
+ bestc = pq->elements[r];
+ }
+ }
+ if (!compare(&bestc, &ai)) {
+ break;
+ }
+ pq->elements[i] = bestc;
+ i = l;
+ }
+ pq->elements[i] = ai;
+}
+
+static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) {
+ size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length;
+ roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size);
+ answer->elements = (roaring_pq_element_t *)(answer + 1);
+ answer->size = length;
+ for (uint32_t i = 0; i < length; i++) {
+ answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i];
+ answer->elements[i].is_temporary = false;
+ answer->elements[i].size =
+ roaring_bitmap_portable_size_in_bytes(arr[i]);
+ }
+ for (int32_t i = (length >> 1); i >= 0; i--) {
+ percolate_down(answer, i);
+ }
+ return answer;
+}
+
+static roaring_pq_element_t pq_poll(roaring_pq_t *pq) {
+ roaring_pq_element_t ans = *pq->elements;
+ if (pq->size > 1) {
+ pq->elements[0] = pq->elements[--pq->size];
+ percolate_down(pq, 0);
+ } else
+ --pq->size;
+ // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size;
+ return ans;
+}
+
+// this function consumes and frees the inputs
+static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
+ roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = ra_get_size(&x1->high_low_container),
+ length2 = ra_get_size(&x2->high_low_container);
+ if (0 == length1) {
+ roaring_bitmap_free(x1);
+ return x2;
+ }
+ if (0 == length2) {
+ roaring_bitmap_free(x2);
+ return x1;
+ }
+ uint32_t neededcap = length1 > length2 ? length2 : length1;
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ // todo: unsharing can be inefficient as it may create a clone where
+ // none
+ // is needed, but it has the benefit of being easy to reason about.
+
+ ra_unshare_container_at_index(&x1->high_low_container, pos1);
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ assert(type1 != SHARED_CONTAINER_TYPE);
+
+ ra_unshare_container_at_index(&x2->high_low_container, pos2);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ assert(type2 != SHARED_CONTAINER_TYPE);
+
+ container_t *c;
+
+ if ((type2 == BITSET_CONTAINER_TYPE) &&
+ (type1 != BITSET_CONTAINER_TYPE)
+ ){
+ c = container_lazy_ior(c2, type2, c1, type1, &result_type);
+ container_free(c1, type1);
+ if (c != c2) {
+ container_free(c2, type2);
+ }
+ } else {
+ c = container_lazy_ior(c1, type1, c2, type2, &result_type);
+ container_free(c2, type2);
+ if (c != c1) {
+ container_free(c1, type1);
+ }
+ }
+ // since we assume that the initial containers are non-empty, the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_move_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2);
+ } else if (pos2 == length2) {
+ ra_append_move_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1);
+ }
+ ra_clear_without_containers(&x1->high_low_container);
+ ra_clear_without_containers(&x2->high_low_container);
+ roaring_free(x1);
+ roaring_free(x2);
+ return answer;
+}
+
+/**
+ * Compute the union of 'number' bitmaps using a heap. This can
+ * sometimes be faster than roaring_bitmap_or_many which uses
+ * a naive algorithm. Caller is responsible for freeing the
+ * result.
+ */
+roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_pq_t *pq = create_pq(x, number);
+ while (pq->size > 1) {
+ roaring_pq_element_t x1 = pq_poll(pq);
+ roaring_pq_element_t x2 = pq_poll(pq);
+
+ if (x1.is_temporary && x2.is_temporary) {
+ roaring_bitmap_t *newb =
+ lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap);
+ // should normally return a fresh new bitmap *except* that
+ // it can return x1.bitmap or x2.bitmap in degenerate cases
+ bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap));
+ uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
+ roaring_pq_element_t newelement = {
+ .size = bsize, .is_temporary = temporary, .bitmap = newb};
+ pq_add(pq, &newelement);
+ } else if (x2.is_temporary) {
+ roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false);
+ x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap);
+ pq_add(pq, &x2);
+ } else if (x1.is_temporary) {
+ roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false);
+ x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap);
+
+ pq_add(pq, &x1);
+ } else {
+ roaring_bitmap_t *newb =
+ roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false);
+ uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
+ roaring_pq_element_t newelement = {
+ .size = bsize, .is_temporary = true, .bitmap = newb};
+
+ pq_add(pq, &newelement);
+ }
+ }
+ roaring_pq_element_t X = pq_poll(pq);
+ roaring_bitmap_t *answer = X.bitmap;
+ roaring_bitmap_repair_after_lazy(answer);
+ pq_free(pq);
+ return answer;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace api {
+#endif
+/* end file src/roaring_priority_queue.c */