aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToni Uhlig <matzeton@googlemail.com>2024-04-02 04:42:06 +0200
committerToni Uhlig <matzeton@googlemail.com>2024-04-02 04:42:06 +0200
commitf25e290be0b6bd40553c33bd0e3fb8ec3234ba91 (patch)
treecfc5f796cb9212feb77f0666a5111bd5a4e7910a
parent21572635ab15a993600c4efd1246ac0691968a75 (diff)
parent599cc0f4b83a96c247a92aaaa3f39acfec9e1dbe (diff)
Merge remote-tracking branch 'origin/dev' into fix/unused-params-and-fnsfix/unused-params-and-fns
-rw-r--r--.github/workflows/build.yml2
-rw-r--r--configure.ac19
-rw-r--r--doc/protocols.rst9
-rw-r--r--example/ndpiReader.c6
-rw-r--r--fuzz/Makefile.am32
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/000b6da044c11d100ffecc1c58f236a0c243f2c9 (renamed from fuzz/corpus/fuzz_ds_bitmap64/000b6da044c11d100ffecc1c58f236a0c243f2c9)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/03191cdd9d39fd9e9481819abe8096d3cd8dcaa9 (renamed from fuzz/corpus/fuzz_ds_bitmap64/03191cdd9d39fd9e9481819abe8096d3cd8dcaa9)bin29659 -> 29659 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/0b085fb6da7814c2e70c22ca0221090b8bf4002c (renamed from fuzz/corpus/fuzz_ds_bitmap64/0b085fb6da7814c2e70c22ca0221090b8bf4002c)bin30794 -> 30794 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/0c78f9445a26753487132bc34b2fa5526b82357b (renamed from fuzz/corpus/fuzz_ds_bitmap64/0c78f9445a26753487132bc34b2fa5526b82357b)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/0c9acbe4dba896e14dbaaf1063b4b445d2fd8f53 (renamed from fuzz/corpus/fuzz_ds_bitmap64/0c9acbe4dba896e14dbaaf1063b4b445d2fd8f53)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/0ccd0d0397766b982826f7cf9115f97018a62466 (renamed from fuzz/corpus/fuzz_ds_bitmap64/0ccd0d0397766b982826f7cf9115f97018a62466)bin319 -> 319 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/0db99f086dca27ead6b42db91de404061b623981 (renamed from fuzz/corpus/fuzz_ds_bitmap64/0db99f086dca27ead6b42db91de404061b623981)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/0eff7f526d9e547a3fd3fc0bfbaefa1e686ec9ec (renamed from fuzz/corpus/fuzz_ds_bitmap64/0eff7f526d9e547a3fd3fc0bfbaefa1e686ec9ec)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/10c27c41df755669fc42b71d295eda528c02f5b6 (renamed from fuzz/corpus/fuzz_ds_bitmap64/10c27c41df755669fc42b71d295eda528c02f5b6)bin68 -> 68 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/10f0aadbed8649c5917a9d90a1483a30ad83793f (renamed from fuzz/corpus/fuzz_ds_bitmap64/10f0aadbed8649c5917a9d90a1483a30ad83793f)bin131 -> 131 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/2730ef1769e66315391286ab579e13efc1baa670 (renamed from fuzz/corpus/fuzz_ds_bitmap64/2730ef1769e66315391286ab579e13efc1baa670)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/27e24cb43045da83ad481dd8c6cd38de1bd359fc (renamed from fuzz/corpus/fuzz_ds_bitmap64/27e24cb43045da83ad481dd8c6cd38de1bd359fc)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/28005384ac31d00f0dce884ff065f66c76bc391b (renamed from fuzz/corpus/fuzz_ds_bitmap64/28005384ac31d00f0dce884ff065f66c76bc391b)bin29543 -> 29543 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/2ac3872be3ae0f578da110d5580b362374676ef9 (renamed from fuzz/corpus/fuzz_ds_bitmap64/2ac3872be3ae0f578da110d5580b362374676ef9)bin39 -> 39 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/2e540ffd5bd997876f0d84bb1d760602514437d5 (renamed from fuzz/corpus/fuzz_ds_bitmap64/2e540ffd5bd997876f0d84bb1d760602514437d5)bin152 -> 152 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/36f01f003e9a03de198ced6ad4ae919a5a9fd039 (renamed from fuzz/corpus/fuzz_ds_bitmap64/36f01f003e9a03de198ced6ad4ae919a5a9fd039)bin30313 -> 30313 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/375e36427a1552d8ff4d3c496296e4d2e52ae035 (renamed from fuzz/corpus/fuzz_ds_bitmap64/375e36427a1552d8ff4d3c496296e4d2e52ae035)bin1032 -> 1032 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/3af39359237047d1453f3a843348b1a53a7a480d (renamed from fuzz/corpus/fuzz_ds_bitmap64/3af39359237047d1453f3a843348b1a53a7a480d)bin743 -> 743 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/3c2fa67399fcc4c7de379137a631ac8278f5edc7 (renamed from fuzz/corpus/fuzz_ds_bitmap64/3c2fa67399fcc4c7de379137a631ac8278f5edc7)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/3de96d39842d29fa7c63589ecbfd2b652b329334 (renamed from fuzz/corpus/fuzz_ds_bitmap64/3de96d39842d29fa7c63589ecbfd2b652b329334)bin62992 -> 62992 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/448fdcde54b118a62f7a53c134a35ca447c0e762 (renamed from fuzz/corpus/fuzz_ds_bitmap64/448fdcde54b118a62f7a53c134a35ca447c0e762)bin60 -> 60 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/485cc776ea8d113e993d911e39ebbe089669f949 (renamed from fuzz/corpus/fuzz_ds_bitmap64/485cc776ea8d113e993d911e39ebbe089669f949)bin28 -> 28 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/486f2245c8e26f0f769ab53503d6d7bc5d15c97b (renamed from fuzz/corpus/fuzz_ds_bitmap64/486f2245c8e26f0f769ab53503d6d7bc5d15c97b)bin1030 -> 1030 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/4b7cc42c02089c817ff610eb65d149356d96062a (renamed from fuzz/corpus/fuzz_ds_bitmap64/4b7cc42c02089c817ff610eb65d149356d96062a)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/539e28d50616971ae880b07df55661945966626c (renamed from fuzz/corpus/fuzz_ds_bitmap64/539e28d50616971ae880b07df55661945966626c)bin4134 -> 4134 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/5e3bfa52c5d6a5e3cb2288fa9c58dd09b546f2ba (renamed from fuzz/corpus/fuzz_ds_bitmap64/5e3bfa52c5d6a5e3cb2288fa9c58dd09b546f2ba)bin80 -> 80 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/60c5f54695042fd2d6e0b97bddf3e55f8c9807b7 (renamed from fuzz/corpus/fuzz_ds_bitmap64/60c5f54695042fd2d6e0b97bddf3e55f8c9807b7)bin47 -> 47 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/6886bfb6a9cb9398e4dfbf0fa4fb909148d442a1 (renamed from fuzz/corpus/fuzz_ds_bitmap64/6886bfb6a9cb9398e4dfbf0fa4fb909148d442a1)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/6e97172624f802febb1f68d357254e768810efc6 (renamed from fuzz/corpus/fuzz_ds_bitmap64/6e97172624f802febb1f68d357254e768810efc6)bin40 -> 40 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/6f103d053753b48191a50676b186154388fd90ce (renamed from fuzz/corpus/fuzz_ds_bitmap64/6f103d053753b48191a50676b186154388fd90ce)bin29872 -> 29872 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/6fb915c4967d2d8b03c3cbc6e65aa247ea94dde3 (renamed from fuzz/corpus/fuzz_ds_bitmap64/6fb915c4967d2d8b03c3cbc6e65aa247ea94dde3)bin29380 -> 29380 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/707c8ca3c49e1519254f90d69626a4cd351b45ca (renamed from fuzz/corpus/fuzz_ds_bitmap64/707c8ca3c49e1519254f90d69626a4cd351b45ca)bin29830 -> 29830 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/75c56111e67252f6b544aabf055ef7113f44891e (renamed from fuzz/corpus/fuzz_ds_bitmap64/75c56111e67252f6b544aabf055ef7113f44891e)bin32145 -> 32145 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/7b1b296a38fa941521b1ae5ada7f53fcb01616c5 (renamed from fuzz/corpus/fuzz_ds_bitmap64/7b1b296a38fa941521b1ae5ada7f53fcb01616c5)bin35 -> 35 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/8bb13e2f178a7da1580dbcf426885e6ac861a609 (renamed from fuzz/corpus/fuzz_ds_bitmap64/8bb13e2f178a7da1580dbcf426885e6ac861a609)bin29864 -> 29864 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/8dfde24023b6377ec411016db54890ad58370d9c (renamed from fuzz/corpus/fuzz_ds_bitmap64/8dfde24023b6377ec411016db54890ad58370d9c)bin33376 -> 33376 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/90f53e4946ded91eedeffe75747a9118d747d029 (renamed from fuzz/corpus/fuzz_ds_bitmap64/90f53e4946ded91eedeffe75747a9118d747d029)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/920da1a4cec774e0de02ebf54087099de7658cdb (renamed from fuzz/corpus/fuzz_ds_bitmap64/920da1a4cec774e0de02ebf54087099de7658cdb)bin39 -> 39 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/96996a3adf736a9044a04c5d8acd73e385b1bc6b (renamed from fuzz/corpus/fuzz_ds_bitmap64/96996a3adf736a9044a04c5d8acd73e385b1bc6b)bin1037 -> 1037 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/9e55233a97010a5a3f2dfad360e5c8a3e2d9b8ef (renamed from fuzz/corpus/fuzz_ds_bitmap64/9e55233a97010a5a3f2dfad360e5c8a3e2d9b8ef)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/9f28a467c09155c799c880706286180fc50ee0a2 (renamed from fuzz/corpus/fuzz_ds_bitmap64/9f28a467c09155c799c880706286180fc50ee0a2)bin149 -> 149 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/a8fb1448425ca0cb6871ec51c67332eb2185159e (renamed from fuzz/corpus/fuzz_ds_bitmap64/a8fb1448425ca0cb6871ec51c67332eb2185159e)bin15 -> 15 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/ab870e2c87f40df57d2aeb9a035f6f55b3486261 (renamed from fuzz/corpus/fuzz_ds_bitmap64/ab870e2c87f40df57d2aeb9a035f6f55b3486261)bin5885 -> 5885 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/aca80cb5b3b214b3273623a383678e3e66eece2b (renamed from fuzz/corpus/fuzz_ds_bitmap64/aca80cb5b3b214b3273623a383678e3e66eece2b)bin29892 -> 29892 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/afa1100989a7a625fe7bbf39e09a296c53a5572c (renamed from fuzz/corpus/fuzz_ds_bitmap64/afa1100989a7a625fe7bbf39e09a296c53a5572c)bin13 -> 13 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/aff44f8008132c50796911f5f52f1e2307c2a858 (renamed from fuzz/corpus/fuzz_ds_bitmap64/aff44f8008132c50796911f5f52f1e2307c2a858)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/b51a60734da64be0e618bacbea2865a8a7dcd669 (renamed from fuzz/corpus/fuzz_ds_bitmap64/b51a60734da64be0e618bacbea2865a8a7dcd669)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/b5c0fdf0969f9ad91e127670d76df252e6e6b1d9 (renamed from fuzz/corpus/fuzz_ds_bitmap64/b5c0fdf0969f9ad91e127670d76df252e6e6b1d9)bin29549 -> 29549 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/b5d2b5145a6066fd0429e1236dd08fff307122ab (renamed from fuzz/corpus/fuzz_ds_bitmap64/b5d2b5145a6066fd0429e1236dd08fff307122ab)bin90 -> 90 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/b5f6e6294cc83f68f906cdc5195a45c30ce11121 (renamed from fuzz/corpus/fuzz_ds_bitmap64/b5f6e6294cc83f68f906cdc5195a45c30ce11121)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/b8dbebe13b9f719baae6afc13a3ee01846c33089 (renamed from fuzz/corpus/fuzz_ds_bitmap64/b8dbebe13b9f719baae6afc13a3ee01846c33089)bin54 -> 54 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/c03aa82eaa3d73636a44653b05f2aa83d465ae32 (renamed from fuzz/corpus/fuzz_ds_bitmap64/c03aa82eaa3d73636a44653b05f2aa83d465ae32)bin29544 -> 29544 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/c4fcead253b3a6ea0ebb48397751d2439b1f489f (renamed from fuzz/corpus/fuzz_ds_bitmap64/c4fcead253b3a6ea0ebb48397751d2439b1f489f)bin260 -> 260 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/c50e5229c745fc00ea982d7e31a21501362ae054 (renamed from fuzz/corpus/fuzz_ds_bitmap64/c50e5229c745fc00ea982d7e31a21501362ae054)bin48254 -> 48254 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/cedb4282204e8a6469f370e6ed6b082ba0017b54 (renamed from fuzz/corpus/fuzz_ds_bitmap64/cedb4282204e8a6469f370e6ed6b082ba0017b54)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/d39a8fafd0e4a957003dcbc8ca23e48e9640aaba (renamed from fuzz/corpus/fuzz_ds_bitmap64/d39a8fafd0e4a957003dcbc8ca23e48e9640aaba)bin29774 -> 29774 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/d58f4b573cd04c2ff3df7487ca8c5bc942335a30 (renamed from fuzz/corpus/fuzz_ds_bitmap64/d58f4b573cd04c2ff3df7487ca8c5bc942335a30)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/d9739412222ab242871c5292f12e5556e779a9c8 (renamed from fuzz/corpus/fuzz_ds_bitmap64/d9739412222ab242871c5292f12e5556e779a9c8)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/e20b515e8276f7bb16141e37b25d231266840ce3 (renamed from fuzz/corpus/fuzz_ds_bitmap64/e20b515e8276f7bb16141e37b25d231266840ce3)bin30 -> 30 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/e4e2d81d9c15ac2493c5014d584721366dd38af8 (renamed from fuzz/corpus/fuzz_ds_bitmap64/e4e2d81d9c15ac2493c5014d584721366dd38af8)bin38 -> 38 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/e54dbe399501a4027fdd3e2bcd547b4ba8699d00 (renamed from fuzz/corpus/fuzz_ds_bitmap64/e54dbe399501a4027fdd3e2bcd547b4ba8699d00)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/eefc0d805828e9bd300c2ce9370e32554a93d79a (renamed from fuzz/corpus/fuzz_ds_bitmap64/eefc0d805828e9bd300c2ce9370e32554a93d79a)bin29830 -> 29830 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/ef1f8773349d15d2c581642068b3b06206a2b43e (renamed from fuzz/corpus/fuzz_ds_bitmap64/ef1f8773349d15d2c581642068b3b06206a2b43e)bin29301 -> 29301 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/f0c957104bb1b80c9d125d9c8cbb3f06fbf2ab1a (renamed from fuzz/corpus/fuzz_ds_bitmap64/f0c957104bb1b80c9d125d9c8cbb3f06fbf2ab1a)bin4 -> 4 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/f1c09f190bf77d120a0e539412bbd9fa70af062f (renamed from fuzz/corpus/fuzz_ds_bitmap64/f1c09f190bf77d120a0e539412bbd9fa70af062f)bin271 -> 271 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/f4643e6de35a287e7ccc44126aabd147f86bff26 (renamed from fuzz/corpus/fuzz_ds_bitmap64/f4643e6de35a287e7ccc44126aabd147f86bff26)bin29826 -> 29826 bytes
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/fa5b31c6f43981ff6a304189345466790de5a10b (renamed from fuzz/corpus/fuzz_ds_bitmap64/fa5b31c6f43981ff6a304189345466790de5a10b)0
-rw-r--r--fuzz/corpus/fuzz_ds_bitmap64_fuse/fb8f2cb93bb8e3215593ea1395a6ec62c627ed87 (renamed from fuzz/corpus/fuzz_ds_bitmap64/fb8f2cb93bb8e3215593ea1395a6ec62c627ed87)bin48 -> 48 bytes
-rw-r--r--fuzz/fuzz_ds_bitmap64_fuse.cpp (renamed from fuzz/fuzz_ds_bitmap64.cpp)18
-rw-r--r--src/include/ndpi_api.h26
-rw-r--r--src/include/ndpi_define.h.in1
-rw-r--r--src/include/ndpi_private.h1
-rw-r--r--src/include/ndpi_protocol_ids.h1
-rw-r--r--src/include/ndpi_replace_printf.h (renamed from src/lib/ndpi_replace_printf.h)0
-rw-r--r--src/include/ndpi_typedefs.h6
-rw-r--r--src/lib/ndpi_bitmap.c138
-rw-r--r--src/lib/ndpi_bitmap64_fuse.c (renamed from src/lib/ndpi_bitmap64.c)48
-rw-r--r--src/lib/ndpi_domain_classify.c16
-rw-r--r--src/lib/ndpi_main.c7
-rw-r--r--src/lib/protocols/lol_wild_rift.c86
-rw-r--r--src/lib/protocols/tencent_games.c41
-rw-r--r--src/lib/third_party/include/roaring.h2031
-rw-r--r--src/lib/third_party/include/roaring_v2.h1143
-rw-r--r--src/lib/third_party/src/ahocorasick.c2
-rw-r--r--src/lib/third_party/src/roaring.c14473
-rw-r--r--src/lib/third_party/src/roaring_v2.c19747
-rw-r--r--tests/cfgs/caches_cfg/result/teams.pcap.out2
-rw-r--r--tests/cfgs/caches_global/result/lru_ipv6_caches.pcapng.out2
-rw-r--r--tests/cfgs/caches_global/result/teams.pcap.out2
-rw-r--r--tests/cfgs/caches_global/result/zoom_p2p.pcapng.out2
-rw-r--r--tests/cfgs/default/pcap/lol_wild_rift_udp.pcapbin0 -> 1704 bytes
-rw-r--r--tests/cfgs/default/pcap/tencent_games.pcapbin1002 -> 6154 bytes
-rw-r--r--tests/cfgs/default/result/1kxun.pcap.out2
-rw-r--r--tests/cfgs/default/result/4in4tunnel.pcap.out2
-rw-r--r--tests/cfgs/default/result/6in6tunnel.pcap.out2
-rw-r--r--tests/cfgs/default/result/EAQ.pcap.out2
-rw-r--r--tests/cfgs/default/result/FAX-Call-t38-CA-TDM-SIP-FB-1.pcap.out2
-rw-r--r--tests/cfgs/default/result/KakaoTalk_talk.pcap.out2
-rw-r--r--tests/cfgs/default/result/anyconnect-vpn.pcap.out2
-rw-r--r--tests/cfgs/default/result/collectd.pcap.out2
-rw-r--r--tests/cfgs/default/result/custom_rules_ipv6.pcapng.out2
-rw-r--r--tests/cfgs/default/result/custom_rules_same-ip_multiple_ports.pcapng.out4
-rw-r--r--tests/cfgs/default/result/dhcp-fuzz.pcapng.out2
-rw-r--r--tests/cfgs/default/result/discord.pcap.out2
-rw-r--r--tests/cfgs/default/result/discord_mid_flow.pcap.out2
-rw-r--r--tests/cfgs/default/result/dnscrypt-v1-and-resolver-pings.pcap.out2
-rw-r--r--tests/cfgs/default/result/dnscrypt-v2.pcap.out2
-rw-r--r--tests/cfgs/default/result/dnscrypt_skype_false_positive.pcapng.out2
-rw-r--r--tests/cfgs/default/result/epicgames.pcapng.out2
-rw-r--r--tests/cfgs/default/result/fuzz-2006-06-26-2594.pcap.out2
-rw-r--r--tests/cfgs/default/result/fuzz-2020-02-16-11740.pcap.out2
-rw-r--r--tests/cfgs/default/result/gnutella.pcap.out2
-rw-r--r--tests/cfgs/default/result/gtp_false_positive.pcapng.out2
-rw-r--r--tests/cfgs/default/result/h323.pcap.out2
-rw-r--r--tests/cfgs/default/result/http_ipv6.pcap.out2
-rw-r--r--tests/cfgs/default/result/imo.pcap.out2
-rw-r--r--tests/cfgs/default/result/instagram.pcap.out2
-rw-r--r--tests/cfgs/default/result/iphone.pcap.out2
-rw-r--r--tests/cfgs/default/result/ipv6_in_gtp.pcap.out2
-rw-r--r--tests/cfgs/default/result/kontiki.pcap.out2
-rw-r--r--tests/cfgs/default/result/linecall_falsepositve.pcap.out2
-rw-r--r--tests/cfgs/default/result/lol_wild_rift_udp.pcap.out32
-rw-r--r--tests/cfgs/default/result/lru_ipv6_caches.pcapng.out2
-rw-r--r--tests/cfgs/default/result/mullvad_wireguard.pcap.out2
-rw-r--r--tests/cfgs/default/result/mumble.pcapng.out2
-rw-r--r--tests/cfgs/default/result/nintendo.pcap.out2
-rw-r--r--tests/cfgs/default/result/openvpn-tlscrypt.pcap.out2
-rw-r--r--tests/cfgs/default/result/openvpn.pcap.out2
-rw-r--r--tests/cfgs/default/result/openvpn_nohmac.pcapng.out2
-rw-r--r--tests/cfgs/default/result/ossfuzz_seed_fake_traces_1.pcapng.out2
-rw-r--r--tests/cfgs/default/result/ossfuzz_seed_fake_traces_2.pcapng.out2
-rw-r--r--tests/cfgs/default/result/ossfuzz_seed_fake_traces_4.pcapng.out2
-rw-r--r--tests/cfgs/default/result/pps.pcap.out2
-rw-r--r--tests/cfgs/default/result/protonvpn.pcap.out2
-rw-r--r--tests/cfgs/default/result/quic.pcap.out2
-rw-r--r--tests/cfgs/default/result/quic_0RTT.pcap.out2
-rw-r--r--tests/cfgs/default/result/raknet.pcap.out2
-rw-r--r--tests/cfgs/default/result/rdp2.pcap.out2
-rw-r--r--tests/cfgs/default/result/rtp.pcapng.out2
-rw-r--r--tests/cfgs/default/result/rx.pcap.out2
-rw-r--r--tests/cfgs/default/result/sflow.pcap.out2
-rw-r--r--tests/cfgs/default/result/sip.pcap.out2
-rw-r--r--tests/cfgs/default/result/sip_hello.pcapng.out2
-rw-r--r--tests/cfgs/default/result/skinny.pcap.out2
-rw-r--r--tests/cfgs/default/result/softether.pcap.out2
-rw-r--r--tests/cfgs/default/result/starcraft_battle.pcap.out2
-rw-r--r--tests/cfgs/default/result/synscan.pcap.out4
-rw-r--r--tests/cfgs/default/result/teams.pcap.out2
-rw-r--r--tests/cfgs/default/result/teamspeak3.pcap.out2
-rw-r--r--tests/cfgs/default/result/teamviewer.pcap.out2
-rw-r--r--tests/cfgs/default/result/telegram.pcap.out2
-rw-r--r--tests/cfgs/default/result/tencent_games.pcap.out17
-rw-r--r--tests/cfgs/default/result/tftp.pcap.out2
-rw-r--r--tests/cfgs/default/result/toca-boca.pcap.out2
-rw-r--r--tests/cfgs/default/result/viber.pcap.out2
-rw-r--r--tests/cfgs/default/result/webex.pcap.out2
-rw-r--r--tests/cfgs/default/result/weibo.pcap.out2
-rw-r--r--tests/cfgs/default/result/wireguard.pcap.out2
-rw-r--r--tests/cfgs/default/result/zoom.pcap.out2
-rw-r--r--tests/cfgs/default/result/zoom_p2p.pcapng.out2
-rw-r--r--tests/cfgs/disable_protocols/result/dns_long_domainname.pcap.out2
-rw-r--r--tests/cfgs/disable_protocols/result/quic-mvfst-27.pcapng.out2
-rw-r--r--tests/cfgs/enable_payload_stat/result/1kxun.pcap.out2
-rw-r--r--tests/cfgs/flow_risk_lists_disable/result/protonvpn.pcap.out2
-rw-r--r--tests/cfgs/guessing_disable/result/webex.pcap.out2
-rw-r--r--tests/cfgs/ip_lists_disable/result/1kxun.pcap.out2
-rw-r--r--windows/nDPI.vcxproj3
-rw-r--r--windows/nDPI.vcxproj.filters1
173 files changed, 33598 insertions, 4464 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8323c25da..516a569ce 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -107,7 +107,7 @@ jobs:
msan: "--with-sanitizer"
nBPF: ""
lto_gold_linker: "--with-lto-and-gold-linker"
- - compiler: "clang-7" # "Oldest" clang easily available
+ - compiler: "clang-9" # "Oldest" clang easily available
os: ubuntu-20.04
arch: "x86_64"
gcrypt: ""
diff --git a/configure.ac b/configure.ac
index e23a42e81..0a09efd1c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -120,6 +120,25 @@ AC_CHECK_TOOL(AR, ar, [false])
AC_LANG_WERROR
+AC_MSG_CHECKING([whether roaring v3 works])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+ #if __has_include (<stdio.h>)
+ #endif
+ ]])],
+ [
+ AC_MSG_RESULT([yes])
+ ],
+ [
+ AC_MSG_RESULT([no])
+ AC_DEFINE_UNQUOTED(USE_ROARING_V2, "1", [Use CRoaring 2.1.x])
+ ])
+
+GCC_VERSION=`gcc --version | cut -d ' ' -f 3 | head -1|cut -d '.' -f 1`
+if [ "${GCC_VERSION}" -lt "7" ]; then
+ AC_DEFINE_UNQUOTED(USE_ROARING_V2, "1", [Use CRoaring 2.1.x])
+fi
+
+
NDPI_MAJOR=`echo "${PACKAGE_VERSION}" | cut -d . -f 1`
NDPI_MINOR=`echo "${PACKAGE_VERSION}" | cut -d . -f 2`
NDPI_PATCH=`echo "${PACKAGE_VERSION}" | cut -d . -f 3`
diff --git a/doc/protocols.rst b/doc/protocols.rst
index 354766432..3812dada2 100644
--- a/doc/protocols.rst
+++ b/doc/protocols.rst
@@ -697,3 +697,12 @@ References: `Protocol Specs: <https://www.etsi.org/deliver/etsi_ts/129200_129299
File Delivery over Unidirectional Transport.
References: `RFC <https://datatracker.ietf.org/doc/html/rfc6726>`_
+
+
+.. _Proto 407:
+
+`NDPI_PROTOCOL_LOLWILDRIFT`
+============================
+League of Legends: Wild Rift is a mobile MOBA game.
+
+References: `Main site <https://wildrift.leagueoflegends.com/>`_
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index cad0bae9c..029f784f2 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -5585,7 +5585,7 @@ void compressedBitmapUnitTest() {
size_t ser;
char *buf;
ndpi_bitmap_iterator *it;
- u_int32_t value;
+ u_int64_t value;
for(i=0; i<1000; i++) {
u_int32_t v = rand();
@@ -5601,12 +5601,12 @@ void compressedBitmapUnitTest() {
assert(ser > 0);
if(trace) printf("len: %u\n", (unsigned int)ser);
- b1 = ndpi_bitmap_deserialize(buf);
+ b1 = ndpi_bitmap_deserialize(buf, ser);
assert(b1);
assert((it = ndpi_bitmap_iterator_alloc(b)));
while(ndpi_bitmap_iterator_next(it, &value)) {
- if(trace) printf("%u ", value);
+ if(trace) printf("%lu ", (unsigned long)value);
}
if(trace) printf("\n");
diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am
index 34180a50f..b49a069d3 100644
--- a/fuzz/Makefile.am
+++ b/fuzz/Makefile.am
@@ -2,7 +2,7 @@ bin_PROGRAMS = fuzz_process_packet fuzz_ndpi_reader fuzz_ndpi_reader_alloc_fail
#Alghoritms
bin_PROGRAMS += fuzz_alg_bins fuzz_alg_hll fuzz_alg_hw_rsi_outliers_da fuzz_alg_jitter fuzz_alg_ses_des fuzz_alg_crc32_md5 fuzz_alg_bytestream
#Data structures
-bin_PROGRAMS += fuzz_ds_patricia fuzz_ds_ahocorasick fuzz_ds_libcache fuzz_ds_tree fuzz_ds_ptree fuzz_ds_hash fuzz_ds_cmsketch fuzz_ds_bitmap64 fuzz_ds_domain_classify
+bin_PROGRAMS += fuzz_ds_patricia fuzz_ds_ahocorasick fuzz_ds_libcache fuzz_ds_tree fuzz_ds_ptree fuzz_ds_hash fuzz_ds_cmsketch fuzz_ds_bitmap64_fuse fuzz_ds_domain_classify
#Third party
bin_PROGRAMS += fuzz_libinjection fuzz_binaryfusefilter
#Internal crypto
@@ -328,20 +328,20 @@ fuzz_ds_cmsketch_LINK=$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXX) @NDPI_CFLAGS@ $(AM_CXXFLAGS) $(CXXFLAGS) \
$(fuzz_ds_cmsketch_LDFLAGS) @NDPI_LDFLAGS@ $(LDFLAGS) -o $@
-fuzz_ds_bitmap64_SOURCES = fuzz_ds_bitmap64.cpp fuzz_common_code.c
-fuzz_ds_bitmap64_CXXFLAGS = @NDPI_CFLAGS@ $(CXXFLAGS)
-fuzz_ds_bitmap64_CFLAGS = @NDPI_CFLAGS@ $(CXXFLAGS)
-fuzz_ds_bitmap64_LDADD = ../src/lib/libndpi.a $(ADDITIONAL_LIBS)
-fuzz_ds_bitmap64_LDFLAGS = $(LIBS)
+fuzz_ds_bitmap64_fuse_SOURCES = fuzz_ds_bitmap64_fuse.cpp fuzz_common_code.c
+fuzz_ds_bitmap64_fuse_CXXFLAGS = @NDPI_CFLAGS@ $(CXXFLAGS)
+fuzz_ds_bitmap64_fuse_CFLAGS = @NDPI_CFLAGS@ $(CXXFLAGS)
+fuzz_ds_bitmap64_fuse_LDADD = ../src/lib/libndpi.a $(ADDITIONAL_LIBS)
+fuzz_ds_bitmap64_fuse_LDFLAGS = $(LIBS)
if HAS_FUZZLDFLAGS
-fuzz_ds_bitmap64_CXXFLAGS += $(LIB_FUZZING_ENGINE)
-fuzz_ds_bitmap64_CFLAGS += $(LIB_FUZZING_ENGINE)
-fuzz_ds_bitmap64_LDFLAGS += $(LIB_FUZZING_ENGINE)
+fuzz_ds_bitmap64_fuse_CXXFLAGS += $(LIB_FUZZING_ENGINE)
+fuzz_ds_bitmap64_fuse_CFLAGS += $(LIB_FUZZING_ENGINE)
+fuzz_ds_bitmap64_fuse_LDFLAGS += $(LIB_FUZZING_ENGINE)
endif
# force usage of CXX for linker
-fuzz_ds_bitmap64_LINK=$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+fuzz_ds_bitmap64_fuse_LINK=$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXX) @NDPI_CFLAGS@ $(AM_CXXFLAGS) $(CXXFLAGS) \
- $(fuzz_ds_bitmap64_LDFLAGS) @NDPI_LDFLAGS@ $(LDFLAGS) -o $@
+ $(fuzz_ds_bitmap64_fuse_LDFLAGS) @NDPI_LDFLAGS@ $(LDFLAGS) -o $@
fuzz_ds_domain_classify_SOURCES = fuzz_ds_domain_classify.cpp fuzz_common_code.c
fuzz_ds_domain_classify_CXXFLAGS = @NDPI_CFLAGS@ $(CXXFLAGS)
@@ -729,15 +729,15 @@ files_corpus_fuzz_readerutils_parseprotolist := $(wildcard corpus/fuzz_readerut
fuzz_readerutils_parseprotolist_seed_corpus.zip: $(files_corpus_fuzz_readerutils_parseprotolist)
zip -j fuzz_readerutils_parseprotolist_seed_corpus.zip $(files_corpus_fuzz_readerutils_parseprotolist)
-files_corpus_fuzz_ds_bitmap64 := $(wildcard corpus/fuzz_ds_bitmap64/*)
-fuzz_ds_bitmap64_seed_corpus.zip: $(files_corpus_fuzz_ds_bitmap64)
- zip -j fuzz_ds_bitmap64_seed_corpus.zip $(files_corpus_fuzz_ds_bitmap64)
+files_corpus_fuzz_ds_bitmap64_fuse := $(wildcard corpus/fuzz_ds_bitmap64_fuse/*)
+fuzz_ds_bitmap64_fuse_seed_corpus.zip: $(files_corpus_fuzz_ds_bitmap64_fuse)
+ zip -j fuzz_ds_bitmap64_fuse_seed_corpus.zip $(files_corpus_fuzz_ds_bitmap64_fuse)
files_corpus_fuzz_ds_domain_classify := $(wildcard corpus/fuzz_ds_domain_classify/*)
fuzz_ds_domain_classify_seed_corpus.zip: $(files_corpus_fuzz_ds_domain_classify)
zip -j fuzz_ds_domain_classify_seed_corpus.zip $(files_corpus_fuzz_ds_domain_classify)
-corpus: fuzz_ndpi_reader_seed_corpus.zip fuzz_ndpi_reader_alloc_fail_seed_corpus.zip fuzz_ndpi_reader_payload_analyzer_seed_corpus.zip fuzz_quic_get_crypto_data_seed_corpus.zip fuzz_alg_ses_des_seed_corpus.zip fuzz_alg_bins_seed_corpus.zip fuzz_alg_hll_seed_corpus.zip fuzz_alg_jitter_seed_corpus.zip fuzz_ds_libcache_seed_corpus.zip fuzz_community_id_seed_corpus.zip fuzz_serialization_seed_corpus.zip fuzz_ds_ptree_seed_corpus.zip fuzz_alg_crc32_md5_seed_corpus.zip fuzz_alg_bytestream_seed_corpus.zip fuzz_libinjection_seed_corpus.zip fuzz_tls_certificate_seed_corpus.zip fuzz_filecfg_protocols_seed_corpus.zip fuzz_readerutils_workflow_seed_corpus.zip fuzz_readerutils_parseprotolist_seed_corpus.zip fuzz_ds_bitmap64_seed_corpus.zip fuzz_ds_domain_classify_seed_corpus.zip fuzz_filecfg_protocols_seed_corpus.zip fuzz_filecfg_categories_seed_corpus.zip fuzz_filecfg_config_seed_corpus.zip fuzz_filecfg_category_seed_corpus.zip fuzz_is_stun_udp_seed_corpus.zip fuzz_is_stun_tcp_seed_corpus.zip fuzz_filecfg_malicious_sha1_seed_corpus.zip fuzz_filecfg_malicious_ja3_seed_corpus.zip fuzz_filecfg_risk_domains_seed_corpus.zip
+corpus: fuzz_ndpi_reader_seed_corpus.zip fuzz_ndpi_reader_alloc_fail_seed_corpus.zip fuzz_ndpi_reader_payload_analyzer_seed_corpus.zip fuzz_quic_get_crypto_data_seed_corpus.zip fuzz_alg_ses_des_seed_corpus.zip fuzz_alg_bins_seed_corpus.zip fuzz_alg_hll_seed_corpus.zip fuzz_alg_jitter_seed_corpus.zip fuzz_ds_libcache_seed_corpus.zip fuzz_community_id_seed_corpus.zip fuzz_serialization_seed_corpus.zip fuzz_ds_ptree_seed_corpus.zip fuzz_alg_crc32_md5_seed_corpus.zip fuzz_alg_bytestream_seed_corpus.zip fuzz_libinjection_seed_corpus.zip fuzz_tls_certificate_seed_corpus.zip fuzz_filecfg_protocols_seed_corpus.zip fuzz_readerutils_workflow_seed_corpus.zip fuzz_readerutils_parseprotolist_seed_corpus.zip fuzz_ds_bitmap64_fuse_seed_corpus.zip fuzz_ds_domain_classify_seed_corpus.zip fuzz_filecfg_protocols_seed_corpus.zip fuzz_filecfg_categories_seed_corpus.zip fuzz_filecfg_config_seed_corpus.zip fuzz_filecfg_category_seed_corpus.zip fuzz_is_stun_udp_seed_corpus.zip fuzz_is_stun_tcp_seed_corpus.zip fuzz_filecfg_malicious_sha1_seed_corpus.zip fuzz_filecfg_malicious_ja3_seed_corpus.zip fuzz_filecfg_risk_domains_seed_corpus.zip
cp corpus/fuzz_*seed_corpus.zip .
#Create dictionaries exactly as expected by oss-fuzz.
@@ -787,7 +787,7 @@ distdir:
-o -path './corpus/fuzz_alg_crc32_md5/*' \
-o -path './corpus/fuzz_alg_bytestream/*' \
-o -path './corpus/fuzz_ds_libcache/*' \
- -o -path './corpus/fuzz_ds_bitmap64/*' \
+ -o -path './corpus/fuzz_ds_bitmap64_fuse/*' \
-o -path './corpus/fuzz_ds_domain_classify/*' \
-o -path './corpus/fuzz_ds_ptree/*' | xargs -I'{}' cp -r '{}' '$(distdir)/{}'
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/000b6da044c11d100ffecc1c58f236a0c243f2c9 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/000b6da044c11d100ffecc1c58f236a0c243f2c9
index 6e858d125..6e858d125 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/000b6da044c11d100ffecc1c58f236a0c243f2c9
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/000b6da044c11d100ffecc1c58f236a0c243f2c9
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/03191cdd9d39fd9e9481819abe8096d3cd8dcaa9 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/03191cdd9d39fd9e9481819abe8096d3cd8dcaa9
index 2758a2ba4..2758a2ba4 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/03191cdd9d39fd9e9481819abe8096d3cd8dcaa9
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/03191cdd9d39fd9e9481819abe8096d3cd8dcaa9
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/0b085fb6da7814c2e70c22ca0221090b8bf4002c b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0b085fb6da7814c2e70c22ca0221090b8bf4002c
index 1ade82114..1ade82114 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/0b085fb6da7814c2e70c22ca0221090b8bf4002c
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0b085fb6da7814c2e70c22ca0221090b8bf4002c
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/0c78f9445a26753487132bc34b2fa5526b82357b b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0c78f9445a26753487132bc34b2fa5526b82357b
index 1aa2657a9..1aa2657a9 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/0c78f9445a26753487132bc34b2fa5526b82357b
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0c78f9445a26753487132bc34b2fa5526b82357b
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/0c9acbe4dba896e14dbaaf1063b4b445d2fd8f53 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0c9acbe4dba896e14dbaaf1063b4b445d2fd8f53
index 33453b4b8..33453b4b8 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/0c9acbe4dba896e14dbaaf1063b4b445d2fd8f53
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0c9acbe4dba896e14dbaaf1063b4b445d2fd8f53
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/0ccd0d0397766b982826f7cf9115f97018a62466 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0ccd0d0397766b982826f7cf9115f97018a62466
index 83dd483d2..83dd483d2 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/0ccd0d0397766b982826f7cf9115f97018a62466
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0ccd0d0397766b982826f7cf9115f97018a62466
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/0db99f086dca27ead6b42db91de404061b623981 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0db99f086dca27ead6b42db91de404061b623981
index cdb40da90..cdb40da90 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/0db99f086dca27ead6b42db91de404061b623981
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0db99f086dca27ead6b42db91de404061b623981
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/0eff7f526d9e547a3fd3fc0bfbaefa1e686ec9ec b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0eff7f526d9e547a3fd3fc0bfbaefa1e686ec9ec
index e21f4ad0f..e21f4ad0f 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/0eff7f526d9e547a3fd3fc0bfbaefa1e686ec9ec
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/0eff7f526d9e547a3fd3fc0bfbaefa1e686ec9ec
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/10c27c41df755669fc42b71d295eda528c02f5b6 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/10c27c41df755669fc42b71d295eda528c02f5b6
index d40472301..d40472301 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/10c27c41df755669fc42b71d295eda528c02f5b6
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/10c27c41df755669fc42b71d295eda528c02f5b6
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/10f0aadbed8649c5917a9d90a1483a30ad83793f b/fuzz/corpus/fuzz_ds_bitmap64_fuse/10f0aadbed8649c5917a9d90a1483a30ad83793f
index 552d70d2a..552d70d2a 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/10f0aadbed8649c5917a9d90a1483a30ad83793f
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/10f0aadbed8649c5917a9d90a1483a30ad83793f
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/2730ef1769e66315391286ab579e13efc1baa670 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/2730ef1769e66315391286ab579e13efc1baa670
index efd0701f9..efd0701f9 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/2730ef1769e66315391286ab579e13efc1baa670
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/2730ef1769e66315391286ab579e13efc1baa670
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/27e24cb43045da83ad481dd8c6cd38de1bd359fc b/fuzz/corpus/fuzz_ds_bitmap64_fuse/27e24cb43045da83ad481dd8c6cd38de1bd359fc
index b0fda9f66..b0fda9f66 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/27e24cb43045da83ad481dd8c6cd38de1bd359fc
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/27e24cb43045da83ad481dd8c6cd38de1bd359fc
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/28005384ac31d00f0dce884ff065f66c76bc391b b/fuzz/corpus/fuzz_ds_bitmap64_fuse/28005384ac31d00f0dce884ff065f66c76bc391b
index 7590bb32c..7590bb32c 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/28005384ac31d00f0dce884ff065f66c76bc391b
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/28005384ac31d00f0dce884ff065f66c76bc391b
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/2ac3872be3ae0f578da110d5580b362374676ef9 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/2ac3872be3ae0f578da110d5580b362374676ef9
index beb9fe490..beb9fe490 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/2ac3872be3ae0f578da110d5580b362374676ef9
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/2ac3872be3ae0f578da110d5580b362374676ef9
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/2e540ffd5bd997876f0d84bb1d760602514437d5 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/2e540ffd5bd997876f0d84bb1d760602514437d5
index 1dd670bae..1dd670bae 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/2e540ffd5bd997876f0d84bb1d760602514437d5
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/2e540ffd5bd997876f0d84bb1d760602514437d5
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/36f01f003e9a03de198ced6ad4ae919a5a9fd039 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/36f01f003e9a03de198ced6ad4ae919a5a9fd039
index 3468266b4..3468266b4 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/36f01f003e9a03de198ced6ad4ae919a5a9fd039
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/36f01f003e9a03de198ced6ad4ae919a5a9fd039
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/375e36427a1552d8ff4d3c496296e4d2e52ae035 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/375e36427a1552d8ff4d3c496296e4d2e52ae035
index fa5bcc95d..fa5bcc95d 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/375e36427a1552d8ff4d3c496296e4d2e52ae035
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/375e36427a1552d8ff4d3c496296e4d2e52ae035
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/3af39359237047d1453f3a843348b1a53a7a480d b/fuzz/corpus/fuzz_ds_bitmap64_fuse/3af39359237047d1453f3a843348b1a53a7a480d
index 2e5040f8c..2e5040f8c 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/3af39359237047d1453f3a843348b1a53a7a480d
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/3af39359237047d1453f3a843348b1a53a7a480d
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/3c2fa67399fcc4c7de379137a631ac8278f5edc7 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/3c2fa67399fcc4c7de379137a631ac8278f5edc7
index c5c3c3c2b..c5c3c3c2b 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/3c2fa67399fcc4c7de379137a631ac8278f5edc7
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/3c2fa67399fcc4c7de379137a631ac8278f5edc7
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/3de96d39842d29fa7c63589ecbfd2b652b329334 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/3de96d39842d29fa7c63589ecbfd2b652b329334
index 937953c59..937953c59 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/3de96d39842d29fa7c63589ecbfd2b652b329334
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/3de96d39842d29fa7c63589ecbfd2b652b329334
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/448fdcde54b118a62f7a53c134a35ca447c0e762 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/448fdcde54b118a62f7a53c134a35ca447c0e762
index 4b77a8573..4b77a8573 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/448fdcde54b118a62f7a53c134a35ca447c0e762
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/448fdcde54b118a62f7a53c134a35ca447c0e762
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/485cc776ea8d113e993d911e39ebbe089669f949 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/485cc776ea8d113e993d911e39ebbe089669f949
index b9d7cf725..b9d7cf725 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/485cc776ea8d113e993d911e39ebbe089669f949
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/485cc776ea8d113e993d911e39ebbe089669f949
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/486f2245c8e26f0f769ab53503d6d7bc5d15c97b b/fuzz/corpus/fuzz_ds_bitmap64_fuse/486f2245c8e26f0f769ab53503d6d7bc5d15c97b
index 82e64f77a..82e64f77a 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/486f2245c8e26f0f769ab53503d6d7bc5d15c97b
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/486f2245c8e26f0f769ab53503d6d7bc5d15c97b
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/4b7cc42c02089c817ff610eb65d149356d96062a b/fuzz/corpus/fuzz_ds_bitmap64_fuse/4b7cc42c02089c817ff610eb65d149356d96062a
index bbc5ce21e..bbc5ce21e 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/4b7cc42c02089c817ff610eb65d149356d96062a
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/4b7cc42c02089c817ff610eb65d149356d96062a
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/539e28d50616971ae880b07df55661945966626c b/fuzz/corpus/fuzz_ds_bitmap64_fuse/539e28d50616971ae880b07df55661945966626c
index 1b5878df0..1b5878df0 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/539e28d50616971ae880b07df55661945966626c
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/539e28d50616971ae880b07df55661945966626c
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/5e3bfa52c5d6a5e3cb2288fa9c58dd09b546f2ba b/fuzz/corpus/fuzz_ds_bitmap64_fuse/5e3bfa52c5d6a5e3cb2288fa9c58dd09b546f2ba
index b7b5c063a..b7b5c063a 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/5e3bfa52c5d6a5e3cb2288fa9c58dd09b546f2ba
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/5e3bfa52c5d6a5e3cb2288fa9c58dd09b546f2ba
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/60c5f54695042fd2d6e0b97bddf3e55f8c9807b7 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/60c5f54695042fd2d6e0b97bddf3e55f8c9807b7
index 55b3b9311..55b3b9311 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/60c5f54695042fd2d6e0b97bddf3e55f8c9807b7
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/60c5f54695042fd2d6e0b97bddf3e55f8c9807b7
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/6886bfb6a9cb9398e4dfbf0fa4fb909148d442a1 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6886bfb6a9cb9398e4dfbf0fa4fb909148d442a1
index a1a97a72b..a1a97a72b 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/6886bfb6a9cb9398e4dfbf0fa4fb909148d442a1
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6886bfb6a9cb9398e4dfbf0fa4fb909148d442a1
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/6e97172624f802febb1f68d357254e768810efc6 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6e97172624f802febb1f68d357254e768810efc6
index 35b9f61ac..35b9f61ac 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/6e97172624f802febb1f68d357254e768810efc6
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6e97172624f802febb1f68d357254e768810efc6
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/6f103d053753b48191a50676b186154388fd90ce b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6f103d053753b48191a50676b186154388fd90ce
index 7ca663399..7ca663399 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/6f103d053753b48191a50676b186154388fd90ce
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6f103d053753b48191a50676b186154388fd90ce
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/6fb915c4967d2d8b03c3cbc6e65aa247ea94dde3 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6fb915c4967d2d8b03c3cbc6e65aa247ea94dde3
index 91aab7e5b..91aab7e5b 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/6fb915c4967d2d8b03c3cbc6e65aa247ea94dde3
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/6fb915c4967d2d8b03c3cbc6e65aa247ea94dde3
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/707c8ca3c49e1519254f90d69626a4cd351b45ca b/fuzz/corpus/fuzz_ds_bitmap64_fuse/707c8ca3c49e1519254f90d69626a4cd351b45ca
index 17fe2b432..17fe2b432 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/707c8ca3c49e1519254f90d69626a4cd351b45ca
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/707c8ca3c49e1519254f90d69626a4cd351b45ca
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/75c56111e67252f6b544aabf055ef7113f44891e b/fuzz/corpus/fuzz_ds_bitmap64_fuse/75c56111e67252f6b544aabf055ef7113f44891e
index 739ceed05..739ceed05 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/75c56111e67252f6b544aabf055ef7113f44891e
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/75c56111e67252f6b544aabf055ef7113f44891e
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/7b1b296a38fa941521b1ae5ada7f53fcb01616c5 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/7b1b296a38fa941521b1ae5ada7f53fcb01616c5
index 72719d434..72719d434 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/7b1b296a38fa941521b1ae5ada7f53fcb01616c5
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/7b1b296a38fa941521b1ae5ada7f53fcb01616c5
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/8bb13e2f178a7da1580dbcf426885e6ac861a609 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/8bb13e2f178a7da1580dbcf426885e6ac861a609
index 15d2ec270..15d2ec270 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/8bb13e2f178a7da1580dbcf426885e6ac861a609
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/8bb13e2f178a7da1580dbcf426885e6ac861a609
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/8dfde24023b6377ec411016db54890ad58370d9c b/fuzz/corpus/fuzz_ds_bitmap64_fuse/8dfde24023b6377ec411016db54890ad58370d9c
index 1b4e0d294..1b4e0d294 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/8dfde24023b6377ec411016db54890ad58370d9c
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/8dfde24023b6377ec411016db54890ad58370d9c
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/90f53e4946ded91eedeffe75747a9118d747d029 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/90f53e4946ded91eedeffe75747a9118d747d029
index 942f79517..942f79517 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/90f53e4946ded91eedeffe75747a9118d747d029
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/90f53e4946ded91eedeffe75747a9118d747d029
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/920da1a4cec774e0de02ebf54087099de7658cdb b/fuzz/corpus/fuzz_ds_bitmap64_fuse/920da1a4cec774e0de02ebf54087099de7658cdb
index c31d9caea..c31d9caea 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/920da1a4cec774e0de02ebf54087099de7658cdb
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/920da1a4cec774e0de02ebf54087099de7658cdb
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/96996a3adf736a9044a04c5d8acd73e385b1bc6b b/fuzz/corpus/fuzz_ds_bitmap64_fuse/96996a3adf736a9044a04c5d8acd73e385b1bc6b
index 1f39c2362..1f39c2362 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/96996a3adf736a9044a04c5d8acd73e385b1bc6b
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/96996a3adf736a9044a04c5d8acd73e385b1bc6b
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/9e55233a97010a5a3f2dfad360e5c8a3e2d9b8ef b/fuzz/corpus/fuzz_ds_bitmap64_fuse/9e55233a97010a5a3f2dfad360e5c8a3e2d9b8ef
index 2c734f55c..2c734f55c 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/9e55233a97010a5a3f2dfad360e5c8a3e2d9b8ef
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/9e55233a97010a5a3f2dfad360e5c8a3e2d9b8ef
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/9f28a467c09155c799c880706286180fc50ee0a2 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/9f28a467c09155c799c880706286180fc50ee0a2
index d70f7fe0e..d70f7fe0e 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/9f28a467c09155c799c880706286180fc50ee0a2
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/9f28a467c09155c799c880706286180fc50ee0a2
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/a8fb1448425ca0cb6871ec51c67332eb2185159e b/fuzz/corpus/fuzz_ds_bitmap64_fuse/a8fb1448425ca0cb6871ec51c67332eb2185159e
index c745c687e..c745c687e 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/a8fb1448425ca0cb6871ec51c67332eb2185159e
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/a8fb1448425ca0cb6871ec51c67332eb2185159e
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/ab870e2c87f40df57d2aeb9a035f6f55b3486261 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/ab870e2c87f40df57d2aeb9a035f6f55b3486261
index 55a73792d..55a73792d 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/ab870e2c87f40df57d2aeb9a035f6f55b3486261
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/ab870e2c87f40df57d2aeb9a035f6f55b3486261
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/aca80cb5b3b214b3273623a383678e3e66eece2b b/fuzz/corpus/fuzz_ds_bitmap64_fuse/aca80cb5b3b214b3273623a383678e3e66eece2b
index 246262c24..246262c24 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/aca80cb5b3b214b3273623a383678e3e66eece2b
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/aca80cb5b3b214b3273623a383678e3e66eece2b
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/afa1100989a7a625fe7bbf39e09a296c53a5572c b/fuzz/corpus/fuzz_ds_bitmap64_fuse/afa1100989a7a625fe7bbf39e09a296c53a5572c
index 55854c0cf..55854c0cf 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/afa1100989a7a625fe7bbf39e09a296c53a5572c
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/afa1100989a7a625fe7bbf39e09a296c53a5572c
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/aff44f8008132c50796911f5f52f1e2307c2a858 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/aff44f8008132c50796911f5f52f1e2307c2a858
index 6c769fd18..6c769fd18 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/aff44f8008132c50796911f5f52f1e2307c2a858
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/aff44f8008132c50796911f5f52f1e2307c2a858
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/b51a60734da64be0e618bacbea2865a8a7dcd669 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b51a60734da64be0e618bacbea2865a8a7dcd669
index 2f94675b7..2f94675b7 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/b51a60734da64be0e618bacbea2865a8a7dcd669
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b51a60734da64be0e618bacbea2865a8a7dcd669
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/b5c0fdf0969f9ad91e127670d76df252e6e6b1d9 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b5c0fdf0969f9ad91e127670d76df252e6e6b1d9
index f1b41d102..f1b41d102 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/b5c0fdf0969f9ad91e127670d76df252e6e6b1d9
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b5c0fdf0969f9ad91e127670d76df252e6e6b1d9
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/b5d2b5145a6066fd0429e1236dd08fff307122ab b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b5d2b5145a6066fd0429e1236dd08fff307122ab
index fea6e080c..fea6e080c 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/b5d2b5145a6066fd0429e1236dd08fff307122ab
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b5d2b5145a6066fd0429e1236dd08fff307122ab
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/b5f6e6294cc83f68f906cdc5195a45c30ce11121 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b5f6e6294cc83f68f906cdc5195a45c30ce11121
index fc66b353c..fc66b353c 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/b5f6e6294cc83f68f906cdc5195a45c30ce11121
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b5f6e6294cc83f68f906cdc5195a45c30ce11121
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/b8dbebe13b9f719baae6afc13a3ee01846c33089 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b8dbebe13b9f719baae6afc13a3ee01846c33089
index 13aadd727..13aadd727 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/b8dbebe13b9f719baae6afc13a3ee01846c33089
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/b8dbebe13b9f719baae6afc13a3ee01846c33089
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/c03aa82eaa3d73636a44653b05f2aa83d465ae32 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/c03aa82eaa3d73636a44653b05f2aa83d465ae32
index 9d44a9f94..9d44a9f94 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/c03aa82eaa3d73636a44653b05f2aa83d465ae32
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/c03aa82eaa3d73636a44653b05f2aa83d465ae32
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/c4fcead253b3a6ea0ebb48397751d2439b1f489f b/fuzz/corpus/fuzz_ds_bitmap64_fuse/c4fcead253b3a6ea0ebb48397751d2439b1f489f
index dbeb7ca0c..dbeb7ca0c 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/c4fcead253b3a6ea0ebb48397751d2439b1f489f
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/c4fcead253b3a6ea0ebb48397751d2439b1f489f
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/c50e5229c745fc00ea982d7e31a21501362ae054 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/c50e5229c745fc00ea982d7e31a21501362ae054
index 84cb14d22..84cb14d22 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/c50e5229c745fc00ea982d7e31a21501362ae054
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/c50e5229c745fc00ea982d7e31a21501362ae054
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/cedb4282204e8a6469f370e6ed6b082ba0017b54 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/cedb4282204e8a6469f370e6ed6b082ba0017b54
index 17f2335a0..17f2335a0 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/cedb4282204e8a6469f370e6ed6b082ba0017b54
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/cedb4282204e8a6469f370e6ed6b082ba0017b54
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/d39a8fafd0e4a957003dcbc8ca23e48e9640aaba b/fuzz/corpus/fuzz_ds_bitmap64_fuse/d39a8fafd0e4a957003dcbc8ca23e48e9640aaba
index f97878f51..f97878f51 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/d39a8fafd0e4a957003dcbc8ca23e48e9640aaba
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/d39a8fafd0e4a957003dcbc8ca23e48e9640aaba
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/d58f4b573cd04c2ff3df7487ca8c5bc942335a30 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/d58f4b573cd04c2ff3df7487ca8c5bc942335a30
index f5d4f9dff..f5d4f9dff 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/d58f4b573cd04c2ff3df7487ca8c5bc942335a30
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/d58f4b573cd04c2ff3df7487ca8c5bc942335a30
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/d9739412222ab242871c5292f12e5556e779a9c8 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/d9739412222ab242871c5292f12e5556e779a9c8
index 7a8739c9a..7a8739c9a 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/d9739412222ab242871c5292f12e5556e779a9c8
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/d9739412222ab242871c5292f12e5556e779a9c8
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/e20b515e8276f7bb16141e37b25d231266840ce3 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/e20b515e8276f7bb16141e37b25d231266840ce3
index 19600110d..19600110d 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/e20b515e8276f7bb16141e37b25d231266840ce3
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/e20b515e8276f7bb16141e37b25d231266840ce3
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/e4e2d81d9c15ac2493c5014d584721366dd38af8 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/e4e2d81d9c15ac2493c5014d584721366dd38af8
index f305e39c3..f305e39c3 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/e4e2d81d9c15ac2493c5014d584721366dd38af8
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/e4e2d81d9c15ac2493c5014d584721366dd38af8
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/e54dbe399501a4027fdd3e2bcd547b4ba8699d00 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/e54dbe399501a4027fdd3e2bcd547b4ba8699d00
index 672a551ff..672a551ff 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/e54dbe399501a4027fdd3e2bcd547b4ba8699d00
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/e54dbe399501a4027fdd3e2bcd547b4ba8699d00
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/eefc0d805828e9bd300c2ce9370e32554a93d79a b/fuzz/corpus/fuzz_ds_bitmap64_fuse/eefc0d805828e9bd300c2ce9370e32554a93d79a
index 8faa665d5..8faa665d5 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/eefc0d805828e9bd300c2ce9370e32554a93d79a
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/eefc0d805828e9bd300c2ce9370e32554a93d79a
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/ef1f8773349d15d2c581642068b3b06206a2b43e b/fuzz/corpus/fuzz_ds_bitmap64_fuse/ef1f8773349d15d2c581642068b3b06206a2b43e
index ad35998e3..ad35998e3 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/ef1f8773349d15d2c581642068b3b06206a2b43e
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/ef1f8773349d15d2c581642068b3b06206a2b43e
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/f0c957104bb1b80c9d125d9c8cbb3f06fbf2ab1a b/fuzz/corpus/fuzz_ds_bitmap64_fuse/f0c957104bb1b80c9d125d9c8cbb3f06fbf2ab1a
index ded2df2a5..ded2df2a5 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/f0c957104bb1b80c9d125d9c8cbb3f06fbf2ab1a
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/f0c957104bb1b80c9d125d9c8cbb3f06fbf2ab1a
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/f1c09f190bf77d120a0e539412bbd9fa70af062f b/fuzz/corpus/fuzz_ds_bitmap64_fuse/f1c09f190bf77d120a0e539412bbd9fa70af062f
index ef42a3e51..ef42a3e51 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/f1c09f190bf77d120a0e539412bbd9fa70af062f
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/f1c09f190bf77d120a0e539412bbd9fa70af062f
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/f4643e6de35a287e7ccc44126aabd147f86bff26 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/f4643e6de35a287e7ccc44126aabd147f86bff26
index 46351556b..46351556b 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/f4643e6de35a287e7ccc44126aabd147f86bff26
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/f4643e6de35a287e7ccc44126aabd147f86bff26
Binary files differ
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/fa5b31c6f43981ff6a304189345466790de5a10b b/fuzz/corpus/fuzz_ds_bitmap64_fuse/fa5b31c6f43981ff6a304189345466790de5a10b
index 8fb85d223..8fb85d223 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/fa5b31c6f43981ff6a304189345466790de5a10b
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/fa5b31c6f43981ff6a304189345466790de5a10b
diff --git a/fuzz/corpus/fuzz_ds_bitmap64/fb8f2cb93bb8e3215593ea1395a6ec62c627ed87 b/fuzz/corpus/fuzz_ds_bitmap64_fuse/fb8f2cb93bb8e3215593ea1395a6ec62c627ed87
index 20d96d851..20d96d851 100644
--- a/fuzz/corpus/fuzz_ds_bitmap64/fb8f2cb93bb8e3215593ea1395a6ec62c627ed87
+++ b/fuzz/corpus/fuzz_ds_bitmap64_fuse/fb8f2cb93bb8e3215593ea1395a6ec62c627ed87
Binary files differ
diff --git a/fuzz/fuzz_ds_bitmap64.cpp b/fuzz/fuzz_ds_bitmap64_fuse.cpp
index 18a3fcb34..381c16b5c 100644
--- a/fuzz/fuzz_ds_bitmap64.cpp
+++ b/fuzz/fuzz_ds_bitmap64_fuse.cpp
@@ -7,23 +7,23 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
FuzzedDataProvider fuzzed_data(data, size);
u_int16_t i, num_iteration, is_added = 0;
- ndpi_bitmap64 *b;
+ ndpi_bitmap64_fuse *b;
bool rc;
u_int64_t value, value_added;
/* To allow memory allocation failures */
fuzz_set_alloc_callbacks_and_seed(size);
- b = ndpi_bitmap64_alloc();
+ b = ndpi_bitmap64_fuse_alloc();
if(fuzzed_data.ConsumeBool())
- ndpi_bitmap64_compress(b);
+ ndpi_bitmap64_fuse_compress(b);
num_iteration = fuzzed_data.ConsumeIntegral<u_int16_t>();
for (i = 0; i < num_iteration; i++) {
value = fuzzed_data.ConsumeIntegral<u_int64_t>();
- rc = ndpi_bitmap64_set(b, value);
+ rc = ndpi_bitmap64_fuse_set(b, value);
/* Keep one random entry really added */
if (rc == true && is_added == 0 && fuzzed_data.ConsumeBool()) {
value_added = value;
@@ -32,23 +32,23 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
}
if(fuzzed_data.ConsumeBool())
- ndpi_bitmap64_compress(b);
+ ndpi_bitmap64_fuse_compress(b);
/* "Random" search */
num_iteration = fuzzed_data.ConsumeIntegral<u_int8_t>();
for (i = 0; i < num_iteration; i++) {
value = fuzzed_data.ConsumeIntegral<u_int64_t>();
- ndpi_bitmap64_isset(b, value);
+ ndpi_bitmap64_fuse_isset(b, value);
}
/* Search of an added entry */
if (is_added) {
- ndpi_bitmap64_isset(b, value_added);
+ ndpi_bitmap64_fuse_isset(b, value_added);
}
- ndpi_bitmap64_size(b);
+ ndpi_bitmap64_fuse_size(b);
- ndpi_bitmap64_free(b);
+ ndpi_bitmap64_fuse_free(b);
return 0;
}
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 7939abfe2..38fac3add 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1991,18 +1991,16 @@ extern "C" {
*/
ndpi_bitmap* ndpi_bitmap_alloc(void);
- ndpi_bitmap* ndpi_bitmap_alloc_size(u_int32_t size);
void ndpi_bitmap_free(ndpi_bitmap* b);
ndpi_bitmap* ndpi_bitmap_copy(ndpi_bitmap* b);
u_int64_t ndpi_bitmap_cardinality(ndpi_bitmap* b);
bool ndpi_bitmap_is_empty(ndpi_bitmap* b);
- void ndpi_bitmap_set(ndpi_bitmap* b, u_int32_t value);
- void ndpi_bitmap_unset(ndpi_bitmap* b, u_int32_t value);
- bool ndpi_bitmap_isset(ndpi_bitmap* b, u_int32_t value);
- void ndpi_bitmap_clear(ndpi_bitmap* b);
+ void ndpi_bitmap_set(ndpi_bitmap* b, u_int64_t value);
+ void ndpi_bitmap_unset(ndpi_bitmap* b, u_int64_t value);
+ bool ndpi_bitmap_isset(ndpi_bitmap* b, u_int64_t value);
size_t ndpi_bitmap_serialize(ndpi_bitmap* b, char **buf);
- ndpi_bitmap* ndpi_bitmap_deserialize(char *buf);
+ ndpi_bitmap* ndpi_bitmap_deserialize(char *buf, size_t buf_len);
void ndpi_bitmap_and(ndpi_bitmap* a, ndpi_bitmap* b_and);
ndpi_bitmap* ndpi_bitmap_and_alloc(ndpi_bitmap* a, ndpi_bitmap* b_and);
@@ -2014,7 +2012,7 @@ extern "C" {
ndpi_bitmap_iterator* ndpi_bitmap_iterator_alloc(ndpi_bitmap* b);
void ndpi_bitmap_iterator_free(ndpi_bitmap* b);
- bool ndpi_bitmap_iterator_next(ndpi_bitmap_iterator* i, u_int32_t *value);
+ bool ndpi_bitmap_iterator_next(ndpi_bitmap_iterator* i, u_int64_t *value);
/* ******************************* */
@@ -2024,16 +2022,16 @@ extern "C" {
This is
- a probabilistic datastructure !!! (i.e. be prepared to false positives)
- - immutable (i.e. adding keys after a search (i.e. ndpi_bitmap64_isset)
+ - immutable (i.e. adding keys after a search (i.e. ndpi_bitmap64_fuse_isset)
is not allowed
*/
- ndpi_bitmap64* ndpi_bitmap64_alloc(void);
- bool ndpi_bitmap64_set(ndpi_bitmap64 *b, u_int64_t value);
- bool ndpi_bitmap64_compress(ndpi_bitmap64 *b);
- bool ndpi_bitmap64_isset(ndpi_bitmap64 *b, u_int64_t value);
- void ndpi_bitmap64_free(ndpi_bitmap64 *b);
- u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b);
+ ndpi_bitmap64_fuse* ndpi_bitmap64_fuse_alloc(void);
+ bool ndpi_bitmap64_fuse_set(ndpi_bitmap64_fuse *b, u_int64_t value);
+ bool ndpi_bitmap64_fuse_compress(ndpi_bitmap64_fuse *b);
+ bool ndpi_bitmap64_fuse_isset(ndpi_bitmap64_fuse *b, u_int64_t value);
+ void ndpi_bitmap64_fuse_free(ndpi_bitmap64_fuse *b);
+ u_int32_t ndpi_bitmap64_fuse_size(ndpi_bitmap64_fuse *b);
/* ******************************* */
diff --git a/src/include/ndpi_define.h.in b/src/include/ndpi_define.h.in
index f23335974..02ffe057b 100644
--- a/src/include/ndpi_define.h.in
+++ b/src/include/ndpi_define.h.in
@@ -176,6 +176,7 @@
#define get_u_int32_t(X,O) (*(u_int32_t *)((&(((u_int8_t *)X)[O]))))
#if defined(__arm__)
#include <stdint.h>
+#include <string.h>
static inline uint64_t get_u_int64_t(const uint8_t* X, int O)
{
uint64_t tmp;
diff --git a/src/include/ndpi_private.h b/src/include/ndpi_private.h
index e7eae9641..b26803a3c 100644
--- a/src/include/ndpi_private.h
+++ b/src/include/ndpi_private.h
@@ -910,6 +910,7 @@ void init_netease_games_dissector(struct ndpi_detection_module_struct *ndpi_stru
void init_pathofexile_dissector(struct ndpi_detection_module_struct *ndpi_struct, u_int32_t *id);
void init_pfcp_dissector(struct ndpi_detection_module_struct *ndpi_struct, u_int32_t *id);
void init_flute_dissector(struct ndpi_detection_module_struct *ndpi_struct, u_int32_t *id);
+void init_lolwildrift_dissector(struct ndpi_detection_module_struct *ndpi_struct, u_int32_t *id);
#endif
diff --git a/src/include/ndpi_protocol_ids.h b/src/include/ndpi_protocol_ids.h
index 95ccd9f37..a84bda3f1 100644
--- a/src/include/ndpi_protocol_ids.h
+++ b/src/include/ndpi_protocol_ids.h
@@ -435,6 +435,7 @@ typedef enum {
NDPI_PROTOCOL_GOOGLE_CALL = 404, /* Voip/audio/video calls from Google Chat/Meet/... */
NDPI_PROTOCOL_PFCP = 405,
NDPI_PROTOCOL_FLUTE = 406,
+ NDPI_PROTOCOL_LOLWILDRIFT = 407,
#ifdef CUSTOM_NDPI_PROTOCOLS
#include "../../../nDPI-custom/custom_ndpi_protocol_ids.h"
diff --git a/src/lib/ndpi_replace_printf.h b/src/include/ndpi_replace_printf.h
index 73313386c..73313386c 100644
--- a/src/lib/ndpi_replace_printf.h
+++ b/src/include/ndpi_replace_printf.h
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 8ac1516a9..f8ac2383a 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -911,6 +911,9 @@ struct ndpi_flow_udp_struct {
u_int32_t quic_0rtt_found:1;
u_int32_t quic_vn_pair:1;
+ /* NDPI_PROTOCOL_LOLWILDRIFT */
+ u_int32_t lolwildrift_stage:1;
+
/* NDPI_PROTOCOL_EPICGAMES */
u_int32_t epicgames_stage:1;
u_int32_t epicgames_word;
@@ -1141,6 +1144,7 @@ typedef struct ndpi_proto {
typedef void ndpi_bitmap;
typedef void ndpi_bitmap64;
+typedef void ndpi_bitmap64_fuse; /* probabilistic */
typedef void ndpi_bitmap_iterator;
typedef void ndpi_filter;
@@ -1155,7 +1159,7 @@ typedef struct {
typedef struct {
struct {
u_int16_t class_id;
- ndpi_bitmap64 *domains;
+ ndpi_bitmap64_fuse *domains;
} classes[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
} ndpi_domain_classify;
diff --git a/src/lib/ndpi_bitmap.c b/src/lib/ndpi_bitmap.c
index 216251b21..6caf174bf 100644
--- a/src/lib/ndpi_bitmap.c
+++ b/src/lib/ndpi_bitmap.c
@@ -1,7 +1,7 @@
/*
* ndpi_bitmap.c
*
- * Copyright (C) 2011-23 - ntop.org and contributors
+ * Copyright (C) 2011-24 - ntop.org and contributors
*
* This file is part of nDPI, an open source deep packet inspection
* library based on the OpenDPI and PACE technology by ipoque GmbH
@@ -35,145 +35,226 @@
#include "ndpi_includes.h"
#include "ndpi_encryption.h"
+#ifdef USE_ROARING_V2
+#include "third_party/include/roaring_v2.h"
+#else
#include "third_party/include/roaring.h"
+#endif
/* ******************************************* */
ndpi_bitmap* ndpi_bitmap_alloc() {
+#ifdef USE_ROARING_V2
return((ndpi_bitmap*)roaring_bitmap_create());
-}
-
-/* ******************************************* */
-
-ndpi_bitmap* ndpi_bitmap_alloc_size(u_int32_t size) {
- return((ndpi_bitmap*)roaring_bitmap_create_with_capacity(size));
+#else
+ return((ndpi_bitmap*)roaring64_bitmap_create());
+#endif
}
/* ******************************************* */
void ndpi_bitmap_free(ndpi_bitmap* b) {
+#ifdef USE_ROARING_V2
roaring_bitmap_free((const roaring_bitmap_t *)b);
+#else
+ roaring64_bitmap_free((roaring64_bitmap_t *)b);
+#endif
}
/* ******************************************* */
ndpi_bitmap* ndpi_bitmap_copy(ndpi_bitmap* b) {
+#ifdef USE_ROARING_V2
return(roaring_bitmap_copy(b));
+#else
+ return(roaring64_bitmap_copy(b));
+#endif
}
/* ******************************************* */
u_int64_t ndpi_bitmap_cardinality(ndpi_bitmap* b) {
+#ifdef USE_ROARING_V2
return(roaring_bitmap_get_cardinality((const roaring_bitmap_t *)b));
+#else
+ return(roaring64_bitmap_get_cardinality((roaring64_bitmap_t *)b));
+#endif
}
/* ******************************************* */
-void ndpi_bitmap_set(ndpi_bitmap* b, u_int32_t value) {
+void ndpi_bitmap_set(ndpi_bitmap* b, u_int64_t value) {
+#ifdef USE_ROARING_V2
roaring_bitmap_add((roaring_bitmap_t *)b, value);
+#else
+ roaring64_bitmap_add((roaring64_bitmap_t *)b, value);
+#endif
}
/* ******************************************* */
-void ndpi_bitmap_unset(ndpi_bitmap* b, u_int32_t value) {
+void ndpi_bitmap_unset(ndpi_bitmap* b, u_int64_t value) {
+#ifdef USE_ROARING_V2
roaring_bitmap_remove((roaring_bitmap_t *)b, value);
+#else
+ roaring64_bitmap_remove((roaring64_bitmap_t *)b, value);
+#endif
}
/* ******************************************* */
-bool ndpi_bitmap_isset(ndpi_bitmap* b, u_int32_t value) {
- return(roaring_bitmap_contains((const roaring_bitmap_t *)b, value));
-}
-
-/* ******************************************* */
+bool ndpi_bitmap_isset(ndpi_bitmap* b, u_int64_t value) {
+ bool ret;
+
+#ifdef USE_ROARING_V2
+ ret = roaring_bitmap_contains((const roaring_bitmap_t *)b, value);
+#else
+ ret = roaring64_bitmap_contains((const roaring64_bitmap_t *)b, value);
+#endif
-void ndpi_bitmap_clear(ndpi_bitmap* b) {
- roaring_bitmap_clear((roaring_bitmap_t *)b);
+ return(ret);
}
/* ******************************************* */
size_t ndpi_bitmap_serialize(ndpi_bitmap* b, char **buf) {
- const roaring_bitmap_t *r = (const roaring_bitmap_t *)b;
- size_t s = roaring_bitmap_size_in_bytes(r);
+ size_t s;
+#ifdef USE_ROARING_V2
+ const roaring_bitmap_t *r = (const roaring_bitmap_t *)b;
+
+ s = roaring_bitmap_portable_size_in_bytes(r);
+#else
+ const roaring64_bitmap_t *r = (const roaring64_bitmap_t *)b;
+
+ s = roaring64_bitmap_portable_size_in_bytes(r);
+#endif
+
*buf = (char*)ndpi_malloc(s);
if((*buf) == NULL) return(0);
- return(roaring_bitmap_serialize(r, *buf));
+#ifdef USE_ROARING_V2
+ return(roaring_bitmap_portable_serialize(r, *buf));
+#else
+ return(roaring64_bitmap_portable_serialize(r, *buf));
+#endif
}
/* ******************************************* */
-ndpi_bitmap* ndpi_bitmap_deserialize(char *buf) {
- return((ndpi_bitmap*)roaring_bitmap_deserialize(buf));
+ndpi_bitmap* ndpi_bitmap_deserialize(char *buf, size_t buf_len) {
+#ifdef USE_ROARING_V2
+ return((ndpi_bitmap*)roaring_bitmap_portable_deserialize_safe(buf, buf_len));
+#else
+ return((ndpi_bitmap*)roaring64_bitmap_portable_deserialize_safe(buf, buf_len));
+#endif
}
/* ******************************************* */
/* b = b & b_and */
void ndpi_bitmap_and(ndpi_bitmap* a, ndpi_bitmap* b_and) {
+#ifdef USE_ROARING_V2
roaring_bitmap_and_inplace((roaring_bitmap_t*)a, (roaring_bitmap_t*)b_and);
+#else
+ roaring64_bitmap_and_inplace((roaring64_bitmap_t*)a, (roaring64_bitmap_t*)b_and);
+#endif
}
/* ******************************************* */
/* b = b & b_and */
ndpi_bitmap* ndpi_bitmap_and_alloc(ndpi_bitmap* a, ndpi_bitmap* b_and) {
+#ifdef USE_ROARING_V2
return((ndpi_bitmap*)roaring_bitmap_and((roaring_bitmap_t*)a, (roaring_bitmap_t*)b_and));
+#else
+ return((ndpi_bitmap*)roaring64_bitmap_and((roaring64_bitmap_t*)a, (roaring64_bitmap_t*)b_and));
+#endif
}
/* ******************************************* */
/* b = b & !b_and */
void ndpi_bitmap_andnot(ndpi_bitmap* a, ndpi_bitmap* b_and) {
+#ifdef USE_ROARING_V2
roaring_bitmap_andnot_inplace((roaring_bitmap_t*)a, (roaring_bitmap_t*)b_and);
+#else
+ roaring64_bitmap_andnot_inplace((roaring64_bitmap_t*)a, (roaring64_bitmap_t*)b_and);
+#endif
}
/* ******************************************* */
/* b = b | b_or */
void ndpi_bitmap_or(ndpi_bitmap* a, ndpi_bitmap* b_or) {
+#ifdef USE_ROARING_V2
roaring_bitmap_or_inplace((roaring_bitmap_t*)a, (roaring_bitmap_t*)b_or);
+#else
+ roaring64_bitmap_or_inplace((roaring64_bitmap_t*)a, (roaring64_bitmap_t*)b_or);
+#endif
}
/* ******************************************* */
/* b = b | b_or */
ndpi_bitmap* ndpi_bitmap_or_alloc(ndpi_bitmap* a, ndpi_bitmap* b_or) {
+#ifdef USE_ROARING_V2
return((ndpi_bitmap*)roaring_bitmap_or((roaring_bitmap_t*)a, (roaring_bitmap_t*)b_or));
+#else
+ return((ndpi_bitmap*)roaring64_bitmap_or((roaring64_bitmap_t*)a, (roaring64_bitmap_t*)b_or));
+#endif
}
/* ******************************************* */
/* b = b ^ b_xor */
void ndpi_bitmap_xor(ndpi_bitmap* a, ndpi_bitmap* b_xor) {
+#ifdef USE_ROARING_V2
roaring_bitmap_xor_inplace((roaring_bitmap_t*)a, (roaring_bitmap_t*)b_xor);
+#else
+ roaring64_bitmap_xor_inplace((roaring64_bitmap_t*)a, (roaring64_bitmap_t*)b_xor);
+#endif
}
/* ******************************************* */
void ndpi_bitmap_optimize(ndpi_bitmap* a) {
+#ifdef USE_ROARING_V2
roaring_bitmap_run_optimize(a);
+#else
+ roaring64_bitmap_run_optimize(a);
+#endif
}
/* ******************************************* */
ndpi_bitmap_iterator* ndpi_bitmap_iterator_alloc(ndpi_bitmap* b) {
- return(roaring_create_iterator((roaring_bitmap_t*)b));
+#ifdef USE_ROARING_V2
+ return((ndpi_bitmap_iterator*)roaring_create_iterator((roaring_bitmap_t*)b));
+#else
+ return((ndpi_bitmap_iterator*)roaring64_iterator_create((const roaring64_bitmap_t*)b));
+#endif
}
/* ******************************************* */
void ndpi_bitmap_iterator_free(ndpi_bitmap* b) {
+#ifdef USE_ROARING_V2
roaring_free_uint32_iterator((roaring_uint32_iterator_t*)b);
+#else
+ roaring64_iterator_free((roaring64_iterator_t*)b);
+#endif
}
/* ******************************************* */
bool ndpi_bitmap_is_empty(ndpi_bitmap* b) {
+#ifdef USE_ROARING_V2
return(roaring_bitmap_is_empty((roaring_bitmap_t*)b));
+#else
+ return(roaring64_bitmap_is_empty((roaring64_bitmap_t*)b));
+#endif
}
/* ******************************************* */
@@ -182,8 +263,15 @@ bool ndpi_bitmap_is_empty(ndpi_bitmap* b) {
true is returned when a value is present, false when we reached the end
*/
-bool ndpi_bitmap_iterator_next(ndpi_bitmap_iterator* i, uint32_t *value) {
- uint32_t num = roaring_read_uint32_iterator((roaring_uint32_iterator_t*)i, value, 1);
-
+bool ndpi_bitmap_iterator_next(ndpi_bitmap_iterator* i, u_int64_t *value) {
+#ifdef USE_ROARING_V2
+ uint32_t ret;
+ uint32_t num = roaring_read_uint32_iterator((roaring_uint32_iterator_t*)i, &ret, 1);
+
+ *value = (uint32_t)ret;
+#else
+ uint64_t num = roaring64_iterator_read((roaring64_iterator_t*)i, value, 1);
+#endif
+
return((num == 1) ? true /* found */ : false /* not found */);
}
diff --git a/src/lib/ndpi_bitmap64.c b/src/lib/ndpi_bitmap64_fuse.c
index f2b9ca8e7..9cf735e63 100644
--- a/src/lib/ndpi_bitmap64.c
+++ b/src/lib/ndpi_bitmap64_fuse.c
@@ -1,7 +1,7 @@
/*
- * ndpi_bitmap64.c
+ * ndpi_bitmap64_fuse.c
*
- * Copyright (C) 2011-23 - ntop.org and contributors
+ * Copyright (C) 2011-24 - ntop.org and contributors
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
@@ -30,7 +30,7 @@
#include "ndpi_api.h"
#include "third_party/include/binaryfusefilter.h"
-#define NDPI_BITMAP64_REALLOC_SIZE 4096
+#define NDPI_BITMAP64_FUSE_REALLOC_SIZE 4096
// #define PRINT_DUPLICATED_HASHS
@@ -39,16 +39,16 @@ typedef struct {
u_int64_t *entries;
bool is_compressed;
binary_fuse16_t bitmap;
-} ndpi_bitmap64_t;
+} ndpi_bitmap64_fuse_t;
/* ********************************************************** */
-ndpi_bitmap64* ndpi_bitmap64_alloc() {
- ndpi_bitmap64_t *rc = (ndpi_bitmap64_t*)ndpi_malloc(sizeof(ndpi_bitmap64_t));
+ndpi_bitmap64_fuse* ndpi_bitmap64_fuse_alloc() {
+ ndpi_bitmap64_fuse_t *rc = (ndpi_bitmap64_fuse_t*)ndpi_malloc(sizeof(ndpi_bitmap64_fuse_t));
if(!rc) return(rc);
- rc->num_allocated_entries = NDPI_BITMAP64_REALLOC_SIZE, rc->num_used_entries = 0;
+ rc->num_allocated_entries = NDPI_BITMAP64_FUSE_REALLOC_SIZE, rc->num_used_entries = 0;
if((rc->entries = (u_int64_t*)ndpi_calloc(rc->num_allocated_entries, sizeof(u_int64_t))) == NULL) {
ndpi_free(rc);
return(NULL);
@@ -56,12 +56,12 @@ ndpi_bitmap64* ndpi_bitmap64_alloc() {
rc->is_compressed = false;
- return((ndpi_bitmap64*)rc);
+ return((ndpi_bitmap64_fuse*)rc);
}
/* ********************************************************** */
-static int ndpi_bitmap64_entry_compare(const void *_a, const void *_b) {
+static int ndpi_bitmap64_fuse_entry_compare(const void *_a, const void *_b) {
u_int64_t *a = (u_int64_t*)_a, *b = (u_int64_t*)_b;
if(*a < *b) return -1;
@@ -72,8 +72,8 @@ static int ndpi_bitmap64_entry_compare(const void *_a, const void *_b) {
/* ********************************************************** */
/* Sort and compact memory before searching */
-bool ndpi_bitmap64_compress(ndpi_bitmap64 *_b) {
- ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+bool ndpi_bitmap64_fuse_compress(ndpi_bitmap64_fuse *_b) {
+ ndpi_bitmap64_fuse_t *b = (ndpi_bitmap64_fuse_t*)_b;
u_int32_t i;
if(!b)
@@ -86,7 +86,7 @@ bool ndpi_bitmap64_compress(ndpi_bitmap64 *_b) {
if(b->num_used_entries > 1)
qsort(b->entries, b->num_used_entries,
sizeof(u_int64_t),
- ndpi_bitmap64_entry_compare);
+ ndpi_bitmap64_fuse_entry_compare);
/* Now remove duplicates */
u_int64_t old_value = b->entries[0], new_len = 1;
@@ -128,8 +128,8 @@ bool ndpi_bitmap64_compress(ndpi_bitmap64 *_b) {
/* ********************************************************** */
-bool ndpi_bitmap64_set(ndpi_bitmap64 *_b, u_int64_t value) {
- ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+bool ndpi_bitmap64_fuse_set(ndpi_bitmap64_fuse *_b, u_int64_t value) {
+ ndpi_bitmap64_fuse_t *b = (ndpi_bitmap64_fuse_t*)_b;
if(!b)
return(false);
@@ -146,7 +146,7 @@ bool ndpi_bitmap64_set(ndpi_bitmap64 *_b, u_int64_t value) {
if(b->num_used_entries >= b->num_allocated_entries) {
u_int64_t *rc;
- u_int32_t new_len = b->num_allocated_entries + NDPI_BITMAP64_REALLOC_SIZE;
+ u_int32_t new_len = b->num_allocated_entries + NDPI_BITMAP64_FUSE_REALLOC_SIZE;
rc = (u_int64_t*)ndpi_realloc(b->entries,
sizeof(u_int64_t)*b->num_allocated_entries,
@@ -167,14 +167,14 @@ bool ndpi_bitmap64_set(ndpi_bitmap64 *_b, u_int64_t value) {
/* ********************************************************** */
-bool ndpi_bitmap64_isset(ndpi_bitmap64 *_b, u_int64_t value) {
- ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+bool ndpi_bitmap64_fuse_isset(ndpi_bitmap64_fuse *_b, u_int64_t value) {
+ ndpi_bitmap64_fuse_t *b = (ndpi_bitmap64_fuse_t*)_b;
if(!b)
return(false);
if(!b->is_compressed) {
- if(!ndpi_bitmap64_compress(b))
+ if(!ndpi_bitmap64_fuse_compress(b))
return(false); /* Compresssion failed */
}
@@ -183,8 +183,8 @@ bool ndpi_bitmap64_isset(ndpi_bitmap64 *_b, u_int64_t value) {
/* ********************************************************** */
-void ndpi_bitmap64_free(ndpi_bitmap64 *_b) {
- ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+void ndpi_bitmap64_fuse_free(ndpi_bitmap64_fuse *_b) {
+ ndpi_bitmap64_fuse_t *b = (ndpi_bitmap64_fuse_t*)_b;
if(!b)
return;
@@ -199,15 +199,15 @@ void ndpi_bitmap64_free(ndpi_bitmap64 *_b) {
/* ********************************************************** */
-u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *_b) {
- ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+u_int32_t ndpi_bitmap64_fuse_size(ndpi_bitmap64_fuse *_b) {
+ ndpi_bitmap64_fuse_t *b = (ndpi_bitmap64_fuse_t*)_b;
if(!b) return(0);
if(!b->is_compressed) {
- if(!ndpi_bitmap64_compress(b))
+ if(!ndpi_bitmap64_fuse_compress(b))
return(0); /* Compresssion failed */
}
- return(sizeof(ndpi_bitmap64) + binary_fuse16_size_in_bytes(&b->bitmap));
+ return(sizeof(ndpi_bitmap64_fuse) + binary_fuse16_size_in_bytes(&b->bitmap));
}
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index 812412080..fce10d072 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -57,7 +57,7 @@ void ndpi_domain_classify_free(ndpi_domain_classify *s) {
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].domains != NULL) {
- ndpi_bitmap64_free(s->classes[i].domains);
+ ndpi_bitmap64_fuse_free(s->classes[i].domains);
} else
break;
}
@@ -75,7 +75,7 @@ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s) {
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].domains != NULL) {
- tot_len += ndpi_bitmap64_size(s->classes[i].domains);
+ tot_len += ndpi_bitmap64_fuse_size(s->classes[i].domains);
} else
break;
}
@@ -111,7 +111,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
- s->classes[i].domains = ndpi_bitmap64_alloc();
+ s->classes[i].domains = ndpi_bitmap64_fuse_alloc();
if(!s->classes[i].domains)
s->classes[i].class_id = 0;
@@ -125,7 +125,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
hash = ndpi_quick_hash64(domain, strlen(domain));
- return(ndpi_bitmap64_set(s->classes[i].domains, hash));
+ return(ndpi_bitmap64_fuse_set(s->classes[i].domains, hash));
}
/* ********************************************************** */
@@ -146,7 +146,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
- s->classes[i].domains = ndpi_bitmap64_alloc();
+ s->classes[i].domains = ndpi_bitmap64_fuse_alloc();
if(!s->classes[i].domains)
s->classes[i].class_id = 0;
break;
@@ -179,7 +179,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
hash = ndpi_quick_hash64(line, strlen(line));
- if(ndpi_bitmap64_set(s->classes[i].domains, hash))
+ if(ndpi_bitmap64_fuse_set(s->classes[i].domains, hash))
num_added++;
}
@@ -198,7 +198,7 @@ bool ndpi_domain_classify_finalize(ndpi_domain_classify *s) {
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].class_id != 0) {
- ndpi_bitmap64_compress(s->classes[i].domains);
+ ndpi_bitmap64_fuse_compress(s->classes[i].domains);
}
}
return(true);
@@ -258,7 +258,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].class_id != 0) {
- if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) {
+ if(ndpi_bitmap64_fuse_isset(s->classes[i].domains, hash)) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s = %d [%llu]\n",
hostname, s->classes[i].class_id, hash);
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index b9f118f6a..517df9800 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -2287,6 +2287,10 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp
"FLUTE", NDPI_PROTOCOL_CATEGORY_DOWNLOAD_FT,
ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
+ ndpi_set_proto_defaults(ndpi_str, 1 /* cleartext */, 0 /* nw proto */, NDPI_PROTOCOL_FUN, NDPI_PROTOCOL_LOLWILDRIFT,
+ "LoLWildRift", NDPI_PROTOCOL_CATEGORY_GAME,
+ ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
+ ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
#ifdef CUSTOM_NDPI_PROTOCOLS
#include "../../../nDPI-custom/custom_ndpi_main.c"
@@ -6158,6 +6162,9 @@ static int ndpi_callback_init(struct ndpi_detection_module_struct *ndpi_str) {
/* File Delivery over Unidirectional Transport */
init_flute_dissector(ndpi_str, &a);
+ /* League of Legends: Wild Rift */
+ init_lolwildrift_dissector(ndpi_str, &a);
+
#ifdef CUSTOM_NDPI_PROTOCOLS
#include "../../../nDPI-custom/custom_ndpi_main_init.c"
#endif
diff --git a/src/lib/protocols/lol_wild_rift.c b/src/lib/protocols/lol_wild_rift.c
new file mode 100644
index 000000000..eb0328112
--- /dev/null
+++ b/src/lib/protocols/lol_wild_rift.c
@@ -0,0 +1,86 @@
+/*
+ * lol_wild_rift.c
+ *
+ * League of Legends: Wild Rift
+ *
+ * Copyright (C) 2024 - ntop.org
+ * Copyright (C) 2024 - V.G <jacendi@protonmail.com>
+ *
+ * This file is part of nDPI, an open source deep packet inspection
+ * library based on the OpenDPI and PACE technology by ipoque GmbH
+ *
+ * nDPI is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * nDPI is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with nDPI. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "ndpi_protocol_ids.h"
+
+#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_LOLWILDRIFT
+
+#include "ndpi_api.h"
+#include "ndpi_private.h"
+
+static void ndpi_int_lolwildrift_add_connection(struct ndpi_detection_module_struct *ndpi_struct,
+ struct ndpi_flow_struct *flow)
+{
+ NDPI_LOG_INFO(ndpi_struct, "found League of Legends: Wild Rift\n");
+ ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_LOLWILDRIFT,
+ NDPI_PROTOCOL_UNKNOWN, NDPI_CONFIDENCE_DPI);
+}
+
+static void ndpi_search_lolwildrift(struct ndpi_detection_module_struct *ndpi_struct,
+ struct ndpi_flow_struct *flow)
+{
+ struct ndpi_packet_struct const * const packet = &ndpi_struct->packet;
+
+ NDPI_LOG_DBG(ndpi_struct, "search League of Legends: Wild Rift\n");
+
+ if (packet->payload_packet_len == 22 &&
+ ntohl(get_u_int32_t(packet->payload, 0)) == 0x102C841 &&
+ ntohl(get_u_int32_t(packet->payload, packet->payload_packet_len-4)) == 0x41304231)
+ {
+ ndpi_int_lolwildrift_add_connection(ndpi_struct, flow);
+ return;
+ }
+
+ if (packet->payload_packet_len == 69 &&
+ ntohl(get_u_int32_t(packet->payload, 0)) == 0x4000000)
+ {
+ flow->l4.udp.lolwildrift_stage = 1;
+ return;
+ }
+
+ if (flow->l4.udp.lolwildrift_stage == 1 &&
+ packet->payload_packet_len == 359 &&
+ ntohl(get_u_int32_t(packet->payload, 0)) == 0x10000000)
+ {
+ ndpi_int_lolwildrift_add_connection(ndpi_struct, flow);
+ return;
+ }
+
+ NDPI_EXCLUDE_PROTO(ndpi_struct, flow);
+}
+
+void init_lolwildrift_dissector(struct ndpi_detection_module_struct *ndpi_struct,
+ u_int32_t *id)
+{
+ ndpi_set_bitmask_protocol_detection("LoLWildRift", ndpi_struct, *id,
+ NDPI_PROTOCOL_LOLWILDRIFT,
+ ndpi_search_lolwildrift,
+ NDPI_SELECTION_BITMASK_PROTOCOL_V4_V6_UDP_WITH_PAYLOAD,
+ SAVE_DETECTION_BITMASK_AS_UNKNOWN,
+ ADD_TO_DETECTION_BITMASK);
+
+ *id += 1;
+}
diff --git a/src/lib/protocols/tencent_games.c b/src/lib/protocols/tencent_games.c
index df40f5f8d..12bde1b37 100644
--- a/src/lib/protocols/tencent_games.c
+++ b/src/lib/protocols/tencent_games.c
@@ -29,6 +29,14 @@
#include "ndpi_api.h"
#include "ndpi_private.h"
+static void ndpi_int_tencent_games_add_connection(struct ndpi_detection_module_struct *ndpi_struct,
+ struct ndpi_flow_struct *flow)
+{
+ NDPI_LOG_INFO(ndpi_struct, "found Tencent Games\n");
+ ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_TENCENTGAMES,
+ NDPI_PROTOCOL_UNKNOWN, NDPI_CONFIDENCE_DPI);
+}
+
static void ndpi_search_tencent_games(struct ndpi_detection_module_struct *ndpi_struct,
struct ndpi_flow_struct *flow)
{
@@ -40,9 +48,36 @@ static void ndpi_search_tencent_games(struct ndpi_detection_module_struct *ndpi_
if (ntohl(get_u_int32_t(packet->payload, 0)) == 0x3366000B &&
ntohs(get_u_int16_t(packet->payload, 4)) == 0xB)
{
- NDPI_LOG_INFO(ndpi_struct, "found Tencent Games\n");
- ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_TENCENTGAMES,
- NDPI_PROTOCOL_UNKNOWN, NDPI_CONFIDENCE_DPI);
+ ndpi_int_tencent_games_add_connection(ndpi_struct, flow);
+ return;
+ }
+
+ if (ntohl(get_u_int32_t(packet->payload, 0)) == 0x4366AA00 &&
+ ntohl(get_u_int32_t(packet->payload, 12)) == 0x10E68601)
+ {
+ ndpi_int_tencent_games_add_connection(ndpi_struct, flow);
+ return;
+ }
+
+ if (ntohl(get_u_int32_t(packet->payload, 0)) == 0xAA000000 &&
+ ntohl(get_u_int32_t(packet->payload, 10)) == 0x10E68601)
+ {
+ ndpi_int_tencent_games_add_connection(ndpi_struct, flow);
+ return;
+ }
+
+ if (get_u_int16_t(packet->payload, 0) == 0 &&
+ ntohs(get_u_int16_t(packet->payload, 2)) == (u_int16_t)(packet->payload_packet_len-4) &&
+ ntohs(get_u_int16_t(packet->payload, 4)) == 0x7801)
+ {
+ ndpi_int_tencent_games_add_connection(ndpi_struct, flow);
+ return;
+ }
+
+ if (ntohl(get_u_int32_t(packet->payload, 0)) == 0x4215F787 &&
+ get_u_int16_t(packet->payload, 6) == 0)
+ {
+ ndpi_int_tencent_games_add_connection(ndpi_struct, flow);
return;
}
}
diff --git a/src/lib/third_party/include/roaring.h b/src/lib/third_party/include/roaring.h
index 500ba9cb9..4e356ef9a 100644
--- a/src/lib/third_party/include/roaring.h
+++ b/src/lib/third_party/include/roaring.h
@@ -1,5 +1,8 @@
+#ifdef USE_ROARING_V2
+#include "roaring_v2.h"
+#else
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
-// Created by amalgamation.sh on 2023-02-12T11:34:02Z
+// Created by amalgamation.sh on 2024-03-20T03:56:45Z
/*
* The CRoaring project is under a dual license (Apache/MIT).
@@ -58,11 +61,11 @@
// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
#ifndef ROARING_INCLUDE_ROARING_VERSION
#define ROARING_INCLUDE_ROARING_VERSION
-#define ROARING_VERSION "0.9.6"
+#define ROARING_VERSION "3.0.0"
enum {
- ROARING_VERSION_MAJOR = 0,
- ROARING_VERSION_MINOR = 9,
- ROARING_VERSION_REVISION = 6
+ ROARING_VERSION_MAJOR = 3,
+ ROARING_VERSION_MINOR = 0,
+ ROARING_VERSION_REVISION = 0
};
#endif // ROARING_INCLUDE_ROARING_VERSION
/* end file include/roaring/roaring_version.h */
@@ -78,10 +81,11 @@ enum {
#include <stdint.h>
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace api {
+extern "C" {
+namespace roaring {
+namespace api {
#endif
-
/**
* When building .c files as C++, there's added compile-time checking if the
* container types are derived from a `container_t` base class. So long as
@@ -95,12 +99,12 @@ extern "C" { namespace roaring { namespace api {
* code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;`
*/
#if defined(__cplusplus)
- extern "C++" {
- struct container_s {};
- }
- #define ROARING_CONTAINER_T ::roaring::api::container_s
+extern "C++" {
+struct container_s {};
+}
+#define ROARING_CONTAINER_T ::roaring::api::container_s
#else
- #define ROARING_CONTAINER_T void // no compile-time checking
+#define ROARING_CONTAINER_T void // no compile-time checking
#endif
#define ROARING_FLAG_COW UINT8_C(0x1)
@@ -125,15 +129,14 @@ typedef struct roaring_array_s {
uint8_t flags;
} roaring_array_t;
-
typedef bool (*roaring_iterator)(uint32_t value, void *param);
typedef bool (*roaring_iterator64)(uint64_t value, void *param);
/**
-* (For advanced users.)
-* The roaring_statistics_t can be used to collect detailed statistics about
-* the composition of a roaring bitmap.
-*/
+ * (For advanced users.)
+ * The roaring_statistics_t can be used to collect detailed statistics about
+ * the composition of a roaring bitmap.
+ */
typedef struct roaring_statistics_s {
uint32_t n_containers; /* number of containers */
@@ -166,12 +169,912 @@ typedef struct roaring_statistics_s {
// and n_values_arrays, n_values_rle, n_values_bitmap
} roaring_statistics_t;
+/**
+ * Roaring-internal type used to iterate within a roaring container.
+ */
+typedef struct roaring_container_iterator_s {
+ // For bitset and array containers this is the index of the bit / entry.
+ // For run containers this points at the run.
+ int32_t index;
+} roaring_container_iterator_t;
+
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace api {
+}
+}
+} // extern "C" { namespace roaring { namespace api {
#endif
#endif /* ROARING_TYPES_H */
/* end file include/roaring/roaring_types.h */
+/* begin file include/roaring/portability.h */
+/*
+ * portability.h
+ *
+ */
+
+/**
+ * All macros should be prefixed with either CROARING or ROARING.
+ * The library uses both ROARING_...
+ * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for
+ * macros that are provided by the build system or that are closely
+ * related to the format. The header macros may also use ROARING_.
+ * The CROARING_ prefix is for internal macros that a user is unlikely
+ * to ever interact with.
+ */
+
+#ifndef INCLUDE_PORTABILITY_H_
+#define INCLUDE_PORTABILITY_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif // _GNU_SOURCE
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS 1
+#endif // __STDC_FORMAT_MACROS
+
+#ifdef _MSC_VER
+#define CROARING_VISUAL_STUDIO 1
+/**
+ * We want to differentiate carefully between
+ * clang under visual studio and regular visual
+ * studio.
+ */
+#ifdef __clang__
+// clang under visual studio
+#define CROARING_CLANG_VISUAL_STUDIO 1
+#else
+// just regular visual studio (best guess)
+#define CROARING_REGULAR_VISUAL_STUDIO 1
+#endif // __clang__
+#endif // _MSC_VER
+#ifndef CROARING_VISUAL_STUDIO
+#define CROARING_VISUAL_STUDIO 0
+#endif
+#ifndef CROARING_CLANG_VISUAL_STUDIO
+#define CROARING_CLANG_VISUAL_STUDIO 0
+#endif
+#ifndef CROARING_REGULAR_VISUAL_STUDIO
+#define CROARING_REGULAR_VISUAL_STUDIO 0
+#endif
+
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
+#undef _POSIX_C_SOURCE
+#endif
+
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
+#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
+#define _XOPEN_SOURCE 700
+#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
+
+#ifdef __illumos__
+#define __EXTENSIONS__
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h> // will provide posix_memalign with _POSIX_C_SOURCE as defined above
+#ifdef __GLIBC__
+#include <malloc.h> // this should never be needed but there are some reports that it is needed.
+#endif
+
+#ifdef __cplusplus
+extern "C" { // portability definitions are in global scope, not a namespace
+#endif
+
+#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
+#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+#ifndef __restrict__
+#define __restrict__ __restrict
+#endif // __restrict__
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+#if defined(__x86_64__) || defined(_M_X64)
+// we have an x64 processor
+#define CROARING_IS_X64 1
+
+#if defined(_MSC_VER) && (_MSC_VER < 1910)
+// Old visual studio systems won't support AVX2 well.
+#undef CROARING_IS_X64
+#endif
+
+#if defined(__clang_major__) && (__clang_major__ <= 8) && !defined(__AVX2__)
+// Older versions of clang have a bug affecting us
+// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces
+#undef CROARING_IS_X64
+#endif
+
+#ifdef ROARING_DISABLE_X64
+#undef CROARING_IS_X64
+#endif
+// we include the intrinsic header
+#if !CROARING_REGULAR_VISUAL_STUDIO
+/* Non-Microsoft C/C++-compatible compiler */
+#include <x86intrin.h> // on some recent GCC, this will declare posix_memalign
+
+#if CROARING_CLANG_VISUAL_STUDIO
+
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn, we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h> (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+// To avoid reordering imports:
+// clang-format off
+#include <bmiintrin.h> // for _blsr_u64
+#include <lzcntintrin.h> // for __lzcnt64
+#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>
+#if _MSC_VER >= 1920
+// Important: we need the AVX-512 headers:
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512cdintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512vbmiintrin.h>
+#include <avx512vbmi2intrin.h>
+#include <avx512vpopcntdqintrin.h>
+// clang-format on
+#endif // _MSC_VER >= 1920
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif // _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+#endif // defined(__x86_64__) || defined(_M_X64)
+
+#if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
+#define CROARING_USENEON
+#endif
+#if defined(CROARING_USENEON)
+#include <arm_neon.h>
+#endif
+
+#if !CROARING_REGULAR_VISUAL_STUDIO
+/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
+ * assembly */
+#define CROARING_INLINE_ASM 1
+#endif // _MSC_VER
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+/* Microsoft C/C++-compatible compiler */
+#include <intrin.h>
+
+#ifndef __clang__ // if one compiles with MSVC *with* clang, then these
+ // intrinsics are defined!!!
+#define CROARING_INTRINSICS 1
+// sadly there is no way to check whether we are missing these intrinsics
+// specifically.
+
+/* wrappers for Visual Studio built-ins that look like gcc built-ins
+ * __builtin_ctzll */
+/** result might be undefined when input_num is zero */
+static inline int roaring_trailing_zeroes(unsigned long long input_num) {
+ unsigned long index;
+#ifdef _WIN64 // highly recommended!!!
+ _BitScanForward64(&index, input_num);
+#else // if we must support 32-bit Windows
+ if ((uint32_t)input_num != 0) {
+ _BitScanForward(&index, (uint32_t)input_num);
+ } else {
+ _BitScanForward(&index, (uint32_t)(input_num >> 32));
+ index += 32;
+ }
+#endif // _WIN64
+ return index;
+}
+
+/* wrappers for Visual Studio built-ins that look like gcc built-ins
+ * __builtin_clzll */
+/** result might be undefined when input_num is zero */
+static inline int roaring_leading_zeroes(unsigned long long input_num) {
+ unsigned long index;
+#ifdef _WIN64 // highly recommended!!!
+ _BitScanReverse64(&index, input_num);
+#else // if we must support 32-bit Windows
+ if (input_num > 0xFFFFFFFF) {
+ _BitScanReverse(&index, (uint32_t)(input_num >> 32));
+ index += 32;
+ } else {
+ _BitScanReverse(&index, (uint32_t)(input_num));
+ }
+#endif // _WIN64
+ return 63 - index;
+}
+
+/* Use #define so this is effective even under /Ob0 (no inline) */
+#define roaring_unreachable __assume(0)
+#endif // __clang__
+
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+#ifndef CROARING_INTRINSICS
+#define CROARING_INTRINSICS 1
+#define roaring_unreachable __builtin_unreachable()
+/** result might be undefined when input_num is zero */
+static inline int roaring_trailing_zeroes(unsigned long long input_num) {
+ return __builtin_ctzll(input_num);
+}
+/** result might be undefined when input_num is zero */
+static inline int roaring_leading_zeroes(unsigned long long input_num) {
+ return __builtin_clzll(input_num);
+}
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+#define ALIGNED(x) __declspec(align(x))
+#elif defined(__GNUC__) || defined(__clang__)
+#define ALIGNED(x) __attribute__((aligned(x)))
+#else
+#warning "Warning. Unrecognized compiler."
+#define ALIGNED(x)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define CROARING_WARN_UNUSED __attribute__((warn_unused_result))
+#else
+#define CROARING_WARN_UNUSED
+#endif
+
+#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
+
+#ifdef CROARING_USENEON
+// we can always compute the popcount fast.
+#elif (defined(_M_ARM) || defined(_M_ARM64)) && \
+ ((defined(_WIN64) || defined(_WIN32)) && \
+ defined(CROARING_REGULAR_VISUAL_STUDIO) && \
+ CROARING_REGULAR_VISUAL_STUDIO)
+// we will need this function:
+static inline int roaring_hamming_backup(uint64_t x) {
+ uint64_t c1 = UINT64_C(0x5555555555555555);
+ uint64_t c2 = UINT64_C(0x3333333333333333);
+ uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F);
+ x -= (x >> 1) & c1;
+ x = ((x >> 2) & c2) + (x & c2);
+ x = (x + (x >> 4)) & c4;
+ x *= UINT64_C(0x0101010101010101);
+ return x >> 56;
+}
+#endif
+
+static inline int roaring_hamming(uint64_t x) {
+#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
+ CROARING_REGULAR_VISUAL_STUDIO
+#ifdef CROARING_USENEON
+ return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+#elif defined(_M_ARM64)
+ return roaring_hamming_backup(x);
+ // (int) _CountOneBits64(x); is unavailable
+#else // _M_ARM64
+ return (int)__popcnt64(x);
+#endif // _M_ARM64
+#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
+ CROARING_REGULAR_VISUAL_STUDIO
+#ifdef _M_ARM
+ return roaring_hamming_backup(x);
+ // _CountOneBits is unavailable
+#else // _M_ARM
+ return (int)__popcnt((unsigned int)x) +
+ (int)__popcnt((unsigned int)(x >> 32));
+#endif // _M_ARM
+#else
+ return __builtin_popcountll(x);
+#endif
+}
+
+#ifndef UINT64_C
+#define UINT64_C(c) (c##ULL)
+#endif // UINT64_C
+
+#ifndef UINT32_C
+#define UINT32_C(c) (c##UL)
+#endif // UINT32_C
+
+#ifdef __cplusplus
+} // extern "C" {
+#endif // __cplusplus
+
+// this is almost standard?
+#undef STRINGIFY_IMPLEMENTATION_
+#undef STRINGIFY
+#define STRINGIFY_IMPLEMENTATION_(a) #a
+#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)
+
+// Our fast kernels require 64-bit systems.
+//
+// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
+// Furthermore, the number of SIMD registers is reduced.
+//
+// On 32-bit ARM, we would have smaller registers.
+//
+// The library should still have the fallback kernel. It is
+// slower, but it should run everywhere.
+
+//
+// Enable valid runtime implementations, and select
+// CROARING_BUILTIN_IMPLEMENTATION
+//
+
+// We are going to use runtime dispatch.
+#if CROARING_IS_X64
+#ifdef __clang__
+// clang does not have GCC push pop
+// warning: clang attribute push can't be used within a namespace in clang up
+// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be
+// *outside* of a namespace.
+#define CROARING_TARGET_REGION(T) \
+ _Pragma(STRINGIFY(clang attribute push(__attribute__((target(T))), \
+ apply_to = function)))
+#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop")
+#elif defined(__GNUC__)
+// GCC is easier
+#define CROARING_TARGET_REGION(T) \
+ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
+#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options")
+#endif // clang then gcc
+
+#endif // CROARING_IS_X64
+
+// Default target region macros don't do anything.
+#ifndef CROARING_TARGET_REGION
+#define CROARING_TARGET_REGION(T)
+#define CROARING_UNTARGET_REGION
+#endif
+
+#define CROARING_TARGET_AVX2 \
+ CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt")
+#define CROARING_TARGET_AVX512 \
+ CROARING_TARGET_REGION( \
+ "avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw," \
+ "avx512vbmi2,avx512bitalg,avx512vpopcntdq")
+#define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION
+#define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION
+
+#ifdef __AVX2__
+// No need for runtime dispatching.
+// It is unnecessary and harmful to old clang to tag regions.
+#undef CROARING_TARGET_AVX2
+#define CROARING_TARGET_AVX2
+#undef CROARING_UNTARGET_AVX2
+#define CROARING_UNTARGET_AVX2
+#endif
+
+#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && \
+ defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && \
+ defined(__AVX512VPOPCNTDQ__)
+// No need for runtime dispatching.
+// It is unnecessary and harmful to old clang to tag regions.
+#undef CROARING_TARGET_AVX512
+#define CROARING_TARGET_AVX512
+#undef CROARING_UNTARGET_AVX512
+#define CROARING_UNTARGET_AVX512
+#endif
+
+// Allow unaligned memory access
+#if defined(__GNUC__) || defined(__clang__)
+#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
+#else
+#define ALLOW_UNALIGNED
+#endif
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+#define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(_WIN32)
+#define CROARING_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || \
+ defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined
+ // __ORDER_BIG_ENDIAN__
+#include <machine/endian.h>
+#elif defined(sun) || \
+ defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
+#include <sys/byteorder.h>
+#else // defined(__APPLE__) || defined(__FreeBSD__)
+
+#ifdef __has_include
+#if __has_include(<endian.h>)
+#include <endian.h>
+#endif //__has_include(<endian.h>)
+#endif //__has_include
+
+#endif // defined(__APPLE__) || defined(__FreeBSD__)
+
+#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
+#define CROARING_IS_BIG_ENDIAN 0
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define CROARING_IS_BIG_ENDIAN 0
+#else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define CROARING_IS_BIG_ENDIAN 1
+#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#endif
+
+// Host <-> big endian conversion.
+#if CROARING_IS_BIG_ENDIAN
+#define croaring_htobe64(x) (x)
+
+#elif defined(_WIN32) || defined(_WIN64) // CROARING_IS_BIG_ENDIAN
+#include <stdlib.h>
+#define croaring_htobe64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__) // CROARING_IS_BIG_ENDIAN
+#include <libkern/OSByteOrder.h>
+#define croaring_htobe64(x) OSSwapInt64(x)
+
+#elif defined(__has_include) && \
+ __has_include(<byteswap.h>) // CROARING_IS_BIG_ENDIAN
+#include <byteswap.h>
+#define croaring_htobe64(x) __bswap_64(x)
+
+#else // CROARING_IS_BIG_ENDIAN
+// Gets compiled to bswap or equivalent on most compilers.
+#define croaring_htobe64(x) \
+ (((x & 0x00000000000000FFULL) << 56) | \
+ ((x & 0x000000000000FF00ULL) << 40) | \
+ ((x & 0x0000000000FF0000ULL) << 24) | \
+ ((x & 0x00000000FF000000ULL) << 8) | ((x & 0x000000FF00000000ULL) >> 8) | \
+ ((x & 0x0000FF0000000000ULL) >> 24) | \
+ ((x & 0x00FF000000000000ULL) >> 40) | \
+ ((x & 0xFF00000000000000ULL) >> 56))
+#endif // CROARING_IS_BIG_ENDIAN
+#define croaring_be64toh(x) croaring_htobe64(x)
+// End of host <-> big endian conversion.
+
+// Defines for the possible CROARING atomic implementations
+#define CROARING_ATOMIC_IMPL_NONE 1
+#define CROARING_ATOMIC_IMPL_CPP 2
+#define CROARING_ATOMIC_IMPL_C 3
+#define CROARING_ATOMIC_IMPL_C_WINDOWS 4
+
+// If the use has forced a specific implementation, use that, otherwise,
+// figure out the best implementation we can use.
+#if !defined(CROARING_ATOMIC_IMPL)
+#if defined(__cplusplus) && __cplusplus >= 201103L
+#ifdef __has_include
+#if __has_include(<atomic>)
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
+#endif //__has_include(<atomic>)
+#else
+ // We lack __has_include to check:
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
+#endif //__has_include
+#elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C
+#elif CROARING_REGULAR_VISUAL_STUDIO
+ // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/
+#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS
+#endif
+#endif // !defined(CROARING_ATOMIC_IMPL)
+
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+#include <stdatomic.h>
+typedef _Atomic(uint32_t) croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+ // Increasing the reference counter can always be done with
+ // memory_order_relaxed: New references to an object can only be formed from
+ // an existing reference, and passing an existing reference from one thread
+ // to another must already provide any required synchronization.
+ atomic_fetch_add_explicit(val, 1, memory_order_relaxed);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+ // It is important to enforce any possible access to the object in one
+ // thread (through an existing reference) to happen before deleting the
+ // object in a different thread. This is achieved by a "release" operation
+ // after dropping a reference (any access to the object through this
+ // reference must obviously happened before), and an "acquire" operation
+ // before deleting the object.
+ bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1;
+ if (is_zero) {
+ atomic_thread_fence(memory_order_acquire);
+ }
+ return is_zero;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+ return atomic_load_explicit(val, memory_order_relaxed);
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
+#include <atomic>
+typedef std::atomic<uint32_t> croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+ val->fetch_add(1, std::memory_order_relaxed);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+ // See above comments on the c11 atomic implementation for memory ordering
+ bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1;
+ if (is_zero) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ }
+ return is_zero;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+ return val->load(std::memory_order_relaxed);
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS
+#include <intrin.h>
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedDecrement)
+
+// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and
+// overflow is defined to wrap, so we can pretend it is a uint32_t for our case
+typedef volatile long croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+ _InterlockedIncrement(val);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+ return _InterlockedDecrement(val) == 0;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+ // Per
+ // https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
+ // > Simple reads and writes to properly-aligned 32-bit variables are atomic
+ // > operations. In other words, you will not end up with only one portion
+ // > of the variable updated; all bits are updated in an atomic fashion.
+ return *val;
+}
+//#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE
+#else
+#include <assert.h>
+typedef uint32_t croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+ *val += 1;
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+ assert(*val > 0);
+ *val -= 1;
+ return val == 0;
+}
+
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
+ return *val;
+}
+//#else
+//#error "Unknown atomic implementation"
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define CROARING_DEPRECATED __attribute__((deprecated))
+#else
+#define CROARING_DEPRECATED
+#endif // defined(__GNUC__) || defined(__clang__)
+
+// We need portability.h to be included first,
+// but we also always want isadetection.h to be
+// included (right after).
+// See https://github.com/RoaringBitmap/CRoaring/issues/394
+// There is no scenario where we want portability.h to
+// be included, but not isadetection.h: the latter is a
+// strict requirement.
+#endif /* INCLUDE_PORTABILITY_H_ */
+/* end file include/roaring/portability.h */
+/* begin file include/roaring/bitset/bitset.h */
+#ifndef CBITSET_BITSET_H
+#define CBITSET_BITSET_H
+
+// For compatibility with MSVC with the use of `restrict`
+#if (__STDC_VERSION__ >= 199901L) || \
+ (defined(__GNUC__) && defined(__STDC_VERSION__))
+#define CBITSET_RESTRICT restrict
+#else
+#define CBITSET_RESTRICT
+#endif // (__STDC_VERSION__ >= 199901L) || (defined(__GNUC__) &&
+ // defined(__STDC_VERSION__ ))
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace api {
+#endif
+
+struct bitset_s {
+ uint64_t *CBITSET_RESTRICT array;
+ /* For simplicity and performance, we prefer to have a size and a capacity
+ * that is a multiple of 64 bits. Thus we only track the size and the
+ * capacity in terms of 64-bit words allocated */
+ size_t arraysize;
+ size_t capacity;
+};
+
+typedef struct bitset_s bitset_t;
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_t *bitset_create(void);
+
+/* Create a new bitset able to contain size bits. Return NULL in case of
+ * failure. */
+bitset_t *bitset_create_with_capacity(size_t size);
+
+/* Free memory. */
+void bitset_free(bitset_t *bitset);
+
+/* Set all bits to zero. */
+void bitset_clear(bitset_t *bitset);
+
+/* Set all bits to one. */
+void bitset_fill(bitset_t *bitset);
+
+/* Create a copy */
+bitset_t *bitset_copy(const bitset_t *bitset);
+
+/* For advanced users: Resize the bitset so that it can support newarraysize *
+ * 64 bits. Return true in case of success, false for failure. Pad with zeroes
+ * new buffer areas if requested. */
+bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes);
+
+/* returns how many bytes of memory the backend buffer uses */
+static inline size_t bitset_size_in_bytes(const bitset_t *bitset) {
+ return bitset->arraysize * sizeof(uint64_t);
+}
+
+/* returns how many bits can be accessed */
+static inline size_t bitset_size_in_bits(const bitset_t *bitset) {
+ return bitset->arraysize * 64;
+}
+
+/* returns how many words (64-bit) of memory the backend buffer uses */
+static inline size_t bitset_size_in_words(const bitset_t *bitset) {
+ return bitset->arraysize;
+}
+
+/* For advanced users: Grow the bitset so that it can support newarraysize * 64
+ * bits with padding. Return true in case of success, false for failure. */
+bool bitset_grow(bitset_t *bitset, size_t newarraysize);
+
+/* attempts to recover unused memory, return false in case of
+ * roaring_reallocation failure */
+bool bitset_trim(bitset_t *bitset);
+
+/* shifts all bits by 's' positions so that the bitset representing values
+ * 1,2,10 would represent values 1+s, 2+s, 10+s */
+void bitset_shift_left(bitset_t *bitset, size_t s);
+
+/* shifts all bits by 's' positions so that the bitset representing values
+ * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */
+void bitset_shift_right(bitset_t *bitset, size_t s);
+
+/* Set the ith bit. Attempts to resize the bitset if needed (may silently fail)
+ */
+static inline void bitset_set(bitset_t *bitset, size_t i) {
+ size_t shiftedi = i / 64;
+ if (shiftedi >= bitset->arraysize) {
+ if (!bitset_grow(bitset, shiftedi + 1)) {
+ return;
+ }
+ }
+ bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64);
+}
+
+/* Set the ith bit to the specified value. Attempts to resize the bitset if
+ * needed (may silently fail) */
+static inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) {
+ size_t shiftedi = i / 64;
+ uint64_t mask = ((uint64_t)1) << (i % 64);
+ uint64_t dynmask = ((uint64_t)flag) << (i % 64);
+ if (shiftedi >= bitset->arraysize) {
+ if (!bitset_grow(bitset, shiftedi + 1)) {
+ return;
+ }
+ }
+ uint64_t w = bitset->array[shiftedi];
+ w &= ~mask;
+ w |= dynmask;
+ bitset->array[shiftedi] = w;
+}
+
+/* Get the value of the ith bit. */
+static inline bool bitset_get(const bitset_t *bitset, size_t i) {
+ size_t shiftedi = i / 64;
+ if (shiftedi >= bitset->arraysize) {
+ return false;
+ }
+ return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0;
+}
+
+/* Count number of bits set. */
+size_t bitset_count(const bitset_t *bitset);
+
+/* Find the index of the first bit set. Or zero if the bitset is empty. */
+size_t bitset_minimum(const bitset_t *bitset);
+
+/* Find the index of the last bit set. Or zero if the bitset is empty. */
+size_t bitset_maximum(const bitset_t *bitset);
+
+/* compute the union in-place (to b1), returns true if successful, to generate a
+ * new bitset first call bitset_copy */
+bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* report the size of the union (without materializing it) */
+size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the intersection in-place (to b1), to generate a new bitset first
+ * call bitset_copy */
+void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* report the size of the intersection (without materializing it) */
+size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if the bitsets contain no common elements */
+bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if the bitsets contain any common elements */
+bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if b1 contains all of the set bits of b2 */
+bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the difference in-place (to b1), to generate a new bitset first call
+ * bitset_copy */
+void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the size of the difference */
+size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the symmetric difference in-place (to b1), return true if successful,
+ * to generate a new bitset first call bitset_copy */
+bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the size of the symmetric difference */
+size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2);
+
+/* iterate over the set bits
+ like so :
+ for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) {
+ //.....
+ }
+ */
+static inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) {
+ size_t x = *i / 64;
+ if (x >= bitset->arraysize) {
+ return false;
+ }
+ uint64_t w = bitset->array[x];
+ w >>= (*i & 63);
+ if (w != 0) {
+ *i += roaring_trailing_zeroes(w);
+ return true;
+ }
+ x++;
+ while (x < bitset->arraysize) {
+ w = bitset->array[x];
+ if (w != 0) {
+ *i = x * 64 + roaring_trailing_zeroes(w);
+ return true;
+ }
+ x++;
+ }
+ return false;
+}
+
+/* iterate over the set bits
+ like so :
+ size_t buffer[256];
+ size_t howmany = 0;
+ for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256,
+ &startfrom)) > 0 ; startfrom++) {
+ //.....
+ }
+ */
+static inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
+ size_t capacity, size_t *startfrom) {
+ if (capacity == 0) return 0; // sanity check
+ size_t x = *startfrom / 64;
+ if (x >= bitset->arraysize) {
+ return 0; // nothing more to iterate over
+ }
+ uint64_t w = bitset->array[x];
+ w >>= (*startfrom & 63);
+ size_t howmany = 0;
+ size_t base = x << 6;
+ while (howmany < capacity) {
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = roaring_trailing_zeroes(w);
+ buffer[howmany++] = r + base;
+ if (howmany == capacity) goto end;
+ w ^= t;
+ }
+ x += 1;
+ if (x == bitset->arraysize) {
+ break;
+ }
+ base += 64;
+ w = bitset->array[x];
+ }
+end:
+ if (howmany > 0) {
+ *startfrom = buffer[howmany - 1];
+ }
+ return howmany;
+}
+
+typedef bool (*bitset_iterator)(size_t value, void *param);
+
+// return true if uninterrupted
+static inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
+ void *ptr) {
+ size_t base = 0, i;
+ for (i = 0; i < b->arraysize; ++i) {
+ uint64_t w = b->array[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = roaring_trailing_zeroes(w);
+ if (!iterator(r + base, ptr)) return false;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return true;
+}
+
+static inline void bitset_print(const bitset_t *b) {
+ size_t i;
+ printf("{");
+ for (i = 0; bitset_next_set_bit(b, &i); i++) {
+ printf("%zu, ", i);
+ }
+ printf("}");
+}
+
+#ifdef __cplusplus
+}
+}
+} // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif
+/* end file include/roaring/bitset/bitset.h */
/* begin file include/roaring/roaring.h */
/*
* An implementation of Roaring Bitmaps in C.
@@ -181,12 +1084,16 @@ typedef struct roaring_statistics_s {
#define ROARING_H
#include <stdbool.h>
-#include <stdint.h>
#include <stddef.h> // for `size_t`
+#include <stdint.h>
+
+// Include other headers after roaring_types.h
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace api {
+extern "C" {
+namespace roaring {
+namespace api {
#endif
typedef struct roaring_bitmap_s {
@@ -206,8 +1113,9 @@ roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
* Returns NULL if the allocation fails.
* Client is responsible for calling `roaring_bitmap_free()`.
*/
-static inline roaring_bitmap_t *roaring_bitmap_create(void)
- { return roaring_bitmap_create_with_capacity(0); }
+static inline roaring_bitmap_t *roaring_bitmap_create(void) {
+ return roaring_bitmap_create_with_capacity(0);
+}
/**
* Initialize a roaring bitmap structure in memory controlled by client.
@@ -221,13 +1129,14 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
* The bitmap will be in a "clear" state, with no auxiliary allocations.
* Since this performs no allocations, the function will not fail.
*/
-static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
- { roaring_bitmap_init_with_capacity(r, 0); }
+static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) {
+ roaring_bitmap_init_with_capacity(r, 0);
+}
/**
* Add all the values between min (included) and max (excluded) that are at a
* distance k*step from min.
-*/
+ */
roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
uint32_t step);
@@ -245,11 +1154,10 @@ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
* do so for all of your bitmaps, since interactions between bitmaps with and
* without COW is unsafe.
*/
-static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
+static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r) {
return r->high_low_container.flags & ROARING_FLAG_COW;
}
-static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
- bool cow) {
+static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) {
if (cow) {
r->high_low_container.flags |= ROARING_FLAG_COW;
} else {
@@ -259,18 +1167,58 @@ static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
int64_t offset);
-
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
/**
* Describe the inner structure of the bitmap.
*/
void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
-#endif
/**
* Creates a new bitmap from a list of uint32_t integers
+ *
+ * This function is deprecated, use `roaring_bitmap_from` instead, which
+ * doesn't require the number of elements to be passed in.
+ *
+ * @see roaring_bitmap_from
*/
-roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+
+#ifdef __cplusplus
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring_bitmap_of_ptr` function instead.
+ */
+// Use an immediately invoked closure, capturing by reference
+// (in case __VA_ARGS__ refers to context outside the closure)
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring_bitmap_from(...) \
+ [&]() { \
+ const uint32_t roaring_bitmap_from_array[] = {0, __VA_ARGS__}; \
+ return roaring_bitmap_of_ptr((sizeof(roaring_bitmap_from_array) / \
+ sizeof(roaring_bitmap_from_array[0])) - \
+ 1, \
+ &roaring_bitmap_from_array[1]); \
+ }()
+#else
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring_bitmap_of_ptr` function instead.
+ */
+// While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof
+// expression, which is an unevaluated context, so it's even safe in the case
+// where expressions passed have side effects (roaring64_bitmap_from(my_func(),
+// ++i))
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring_bitmap_from(...) \
+ roaring_bitmap_of_ptr( \
+ (sizeof((const uint32_t[]){0, __VA_ARGS__}) / sizeof(uint32_t)) - 1, \
+ &((const uint32_t[]){0, __VA_ARGS__})[1])
+#endif
/**
* Copies a bitmap (this does memory allocation).
@@ -285,16 +1233,18 @@ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
*
* It might be preferable and simpler to call roaring_bitmap_copy except
* that roaring_bitmap_overwrite can save on memory allocations.
+ *
+ * Returns true if successful, or false if there was an error. On failure,
+ * the dest bitmap is left in a valid, empty state (even if it was not empty
+ * before).
*/
bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
const roaring_bitmap_t *src);
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
/**
* Print the content of the bitmap.
*/
void roaring_bitmap_printf(const roaring_bitmap_t *r);
-#endif
/**
* Computes the intersection between two bitmaps and returns new bitmap. The
@@ -321,10 +1271,10 @@ bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2);
/**
- * Check whether a bitmap and a closed range intersect.
+ * Check whether a bitmap and an open range intersect.
*/
-bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
- uint64_t x, uint64_t y);
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x,
+ uint64_t y);
/**
* Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
@@ -502,15 +1452,15 @@ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
/**
* Add all values in range [min, max]
*/
-void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
- uint32_t min, uint32_t max);
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min,
+ uint32_t max);
/**
* Add all values in range [min, max)
*/
-static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
- uint64_t min, uint64_t max) {
- if(max == min) return;
+static inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (max <= min) return;
roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
}
@@ -522,15 +1472,15 @@ void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
/**
* Remove all values in range [min, max]
*/
-void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
- uint32_t min, uint32_t max);
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min,
+ uint32_t max);
/**
* Remove all values in range [min, max)
*/
-static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
- uint64_t min, uint64_t max) {
- if(max == min) return;
+static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (max <= min) return;
roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
}
@@ -556,12 +1506,11 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
* to range_end (excluded) is present
*/
bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
- uint64_t range_start,
- uint64_t range_end);
+ uint64_t range_start, uint64_t range_end);
/**
- * Check if an items is present, using context from a previous insert for speed
- * optimization.
+ * Check if an items is present, using context from a previous insert or search
+ * for speed optimization.
*
* `context` will be used to store information between calls to make bulk
* operations faster. `*context` should be zero-initialized before the first
@@ -591,11 +1540,10 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
uint64_t range_end);
/**
-* Returns true if the bitmap is empty (cardinality is zero).
-*/
+ * Returns true if the bitmap is empty (cardinality is zero).
+ */
bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
-
/**
* Empties the bitmap. It will have no auxiliary allocations (so if the bitmap
* was initialized in client memory via roaring_bitmap_init(), then a call to
@@ -612,9 +1560,27 @@ void roaring_bitmap_clear(roaring_bitmap_t *r);
*/
void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
+/**
+ * Store the bitmap to a bitset. This can be useful for people
+ * who need the performance and simplicity of a standard bitset.
+ * We assume that the input bitset is originally empty (does not
+ * have any set bit).
+ *
+ * bitset_t * out = bitset_create();
+ * // if the bitset has content in it, call "bitset_clear(out)"
+ * bool success = roaring_bitmap_to_bitset(mybitmap, out);
+ * // on failure, success will be false.
+ * // You can then query the bitset:
+ * bool is_present = bitset_get(out, 10011 );
+ * // you must free the memory:
+ * bitset_free(out);
+ *
+ */
+bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset);
/**
- * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`.
+ * Convert the bitmap to a sorted array from `offset` by `limit`, output in
+ * `ans`.
*
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
@@ -622,9 +1588,8 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
*
* Return false in case of failure (e.g., insufficient memory)
*/
-bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
- size_t offset, size_t limit,
- uint32_t *ans);
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
+ size_t limit, uint32_t *ans);
/**
* Remove run-length encoding even when it is more space efficient.
@@ -657,8 +1622,9 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
*
* Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
@@ -668,12 +1634,30 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
* (See `roaring_bitmap_portable_deserialize()` if you want a format that's
* compatible with Java and Go implementations).
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
/**
+ * Use with `roaring_bitmap_serialize()`.
+ *
+ * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
+ *
+ * The difference with `roaring_bitmap_deserialize()` is that this function
+ * checks that the input buffer is a valid bitmap. If the buffer is too small,
+ * NULL is returned.
+ */
+roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf,
+ size_t maxbytes);
+
+/**
* How many bytes are required to serialize this bitmap (NOT compatible
* with Java and Go versions)
*/
@@ -689,9 +1673,10 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
*
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
-*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
@@ -702,8 +1687,23 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * The function itself is safe in the sense that it will not cause buffer
+ * overflows. However, for correct operations, it is assumed that the bitmap
+ * read was once serialized from a valid bitmap (i.e., it follows the format
+ * specification). If you provided an incorrect input (garbage), then the bitmap
+ * read may not be in a valid state and following operations may not lead to
+ * sensible results. In particular, the serialized array containers need to be
+ * in sorted order, and the run containers should be in sorted non-overlapping
+ * order. This is is guaranteed to happen when serializing an existing bitmap,
+ * but not for random inputs.
+ *
+ * You may use roaring_bitmap_internal_validate to check the validity of the
+ * bitmap prior to using it. You may also use other strategies to check for
+ * corrupted inputs (e.g., checksums).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
size_t maxbytes);
@@ -724,8 +1724,9 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
@@ -757,8 +1758,9 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
@@ -790,8 +1792,9 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
* Serializes bitmap using frozen format.
* Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
@@ -806,8 +1809,9 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
* Bitmap must be freed as usual, by calling roaring_bitmap_free().
* Underlying buffer must not be freed or modified while it backs any bitmaps.
*
- * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
- * the data format is going to be big-endian and not compatible with little-endian systems.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
*/
const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
size_t length);
@@ -950,6 +1954,27 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
/**
+ * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank`
+ * it puts rank value of each element in `[begin .. end)` to `ans[]`
+ *
+ * the values in `[begin .. end)` must be sorted in Ascending order;
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ *
+ * ans = malloc((end-begin) * sizeof(uint64_t));
+ */
+void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans);
+
+/**
+ * Returns the index of x in the given roaring bitmap.
+ * If the roaring bitmap doesn't contain x , this function will return -1.
+ * The difference with rank function is that this function will return -1 when x
+ * is not the element of roaring bitmap, but the rank function will return a
+ * non-negative number.
+ */
+int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x);
+
+/**
* Returns the smallest value in the set, or UINT32_MAX if the set is empty.
*/
uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
@@ -968,136 +1993,215 @@ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
void roaring_bitmap_statistics(const roaring_bitmap_t *r,
roaring_statistics_t *stat);
+/**
+ * Perform internal consistency checks. Returns true if the bitmap is
+ * consistent. It may be useful to call this after deserializing bitmaps from
+ * untrusted sources. If roaring_bitmap_internal_validate returns true, then the
+ * bitmap should be consistent and can be trusted not to cause crashes or memory
+ * corruption.
+ *
+ * Note that some operations intentionally leave bitmaps in an inconsistent
+ * state temporarily, for example, `roaring_bitmap_lazy_*` functions, until
+ * `roaring_bitmap_repair_after_lazy` is called.
+ *
+ * If reason is non-null, it will be set to a string describing the first
+ * inconsistency found if any.
+ */
+bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r,
+ const char **reason);
+
/*********************
* What follows is code use to iterate through values in a roaring bitmap
roaring_bitmap_t *r =...
roaring_uint32_iterator_t i;
-roaring_create_iterator(r, &i);
+roaring_iterator_create(r, &i);
while(i.has_value) {
printf("value = %d\n", i.current_value);
- roaring_advance_uint32_iterator(&i);
+ roaring_uint32_iterator_advance(&i);
}
Obviously, if you modify the underlying bitmap, the iterator
becomes invalid. So don't.
*/
+/**
+ * A struct used to keep iterator state. Users should only access
+ * `current_value` and `has_value`, the rest of the type should be treated as
+ * opaque.
+ */
typedef struct roaring_uint32_iterator_s {
- const roaring_bitmap_t *parent; // owner
- int32_t container_index; // point to the current container index
- int32_t in_container_index; // for bitset and array container, this is out
- // index
- int32_t run_index; // for run container, this points at the run
+ const roaring_bitmap_t *parent; // Owner
+ const ROARING_CONTAINER_T *container; // Current container
+ uint8_t typecode; // Typecode of current container
+ int32_t container_index; // Current container index
+ uint32_t highbits; // High 16 bits of the current value
+ roaring_container_iterator_t container_it;
uint32_t current_value;
bool has_value;
-
- const ROARING_CONTAINER_T
- *container; // should be:
- // parent->high_low_container.containers[container_index];
- uint8_t typecode; // should be:
- // parent->high_low_container.typecodes[container_index];
- uint32_t highbits; // should be:
- // parent->high_low_container.keys[container_index]) <<
- // 16;
-
} roaring_uint32_iterator_t;
/**
- * Initialize an iterator object that can be used to iterate through the
- * values. If there is a value, then this iterator points to the first value
- * and `it->has_value` is true. The value is in `it->current_value`.
+ * Initialize an iterator object that can be used to iterate through the values.
+ * If there is a value, then this iterator points to the first value and
+ * `it->has_value` is true. The value is in `it->current_value`.
*/
-void roaring_init_iterator(const roaring_bitmap_t *r,
+void roaring_iterator_init(const roaring_bitmap_t *r,
roaring_uint32_iterator_t *newit);
+/** DEPRECATED, use `roaring_iterator_init`. */
+CROARING_DEPRECATED static inline void roaring_init_iterator(
+ const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) {
+ roaring_iterator_init(r, newit);
+}
+
/**
- * Initialize an iterator object that can be used to iterate through the
- * values. If there is a value, then this iterator points to the last value
- * and `it->has_value` is true. The value is in `it->current_value`.
+ * Initialize an iterator object that can be used to iterate through the values.
+ * If there is a value, then this iterator points to the last value and
+ * `it->has_value` is true. The value is in `it->current_value`.
*/
-void roaring_init_iterator_last(const roaring_bitmap_t *r,
+void roaring_iterator_init_last(const roaring_bitmap_t *r,
roaring_uint32_iterator_t *newit);
+/** DEPRECATED, use `roaring_iterator_init_last`. */
+CROARING_DEPRECATED static inline void roaring_init_iterator_last(
+ const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) {
+ roaring_iterator_init_last(r, newit);
+}
+
/**
* Create an iterator object that can be used to iterate through the values.
* Caller is responsible for calling `roaring_free_iterator()`.
*
- * The iterator is initialized (this function calls `roaring_init_iterator()`)
+ * The iterator is initialized (this function calls `roaring_iterator_init()`)
* If there is a value, then this iterator points to the first value and
* `it->has_value` is true. The value is in `it->current_value`.
*/
-roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r);
+roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r);
+
+/** DEPRECATED, use `roaring_iterator_create`. */
+CROARING_DEPRECATED static inline roaring_uint32_iterator_t *
+roaring_create_iterator(const roaring_bitmap_t *r) {
+ return roaring_iterator_create(r);
+}
/**
-* Advance the iterator. If there is a new value, then `it->has_value` is true.
-* The new value is in `it->current_value`. Values are traversed in increasing
-* orders. For convenience, returns `it->has_value`.
-*/
-bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
+ * Advance the iterator. If there is a new value, then `it->has_value` is true.
+ * The new value is in `it->current_value`. Values are traversed in increasing
+ * orders. For convenience, returns `it->has_value`.
+ *
+ * Once `it->has_value` is false, `roaring_uint32_iterator_advance` should not
+ * be called on the iterator again. Calling `roaring_uint32_iterator_previous`
+ * is allowed.
+ */
+bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it);
+
+/** DEPRECATED, use `roaring_uint32_iterator_advance`. */
+CROARING_DEPRECATED static inline bool roaring_advance_uint32_iterator(
+ roaring_uint32_iterator_t *it) {
+ return roaring_uint32_iterator_advance(it);
+}
/**
-* Decrement the iterator. If there's a new value, then `it->has_value` is true.
-* The new value is in `it->current_value`. Values are traversed in decreasing
-* order. For convenience, returns `it->has_value`.
-*/
-bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
+ * Decrement the iterator. If there's a new value, then `it->has_value` is true.
+ * The new value is in `it->current_value`. Values are traversed in decreasing
+ * order. For convenience, returns `it->has_value`.
+ *
+ * Once `it->has_value` is false, `roaring_uint32_iterator_previous` should not
+ * be called on the iterator again. Calling `roaring_uint32_iterator_advance` is
+ * allowed.
+ */
+bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it);
+
+/** DEPRECATED, use `roaring_uint32_iterator_previous`. */
+CROARING_DEPRECATED static inline bool roaring_previous_uint32_iterator(
+ roaring_uint32_iterator_t *it) {
+ return roaring_uint32_iterator_previous(it);
+}
/**
* Move the iterator to the first value >= `val`. If there is a such a value,
* then `it->has_value` is true. The new value is in `it->current_value`.
* For convenience, returns `it->has_value`.
*/
-bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it,
uint32_t val);
+/** DEPRECATED, use `roaring_uint32_iterator_move_equalorlarger`. */
+CROARING_DEPRECATED static inline bool
+roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+ uint32_t val) {
+ return roaring_uint32_iterator_move_equalorlarger(it, val);
+}
+
/**
* Creates a copy of an iterator.
* Caller must free it.
*/
-roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+roaring_uint32_iterator_t *roaring_uint32_iterator_copy(
const roaring_uint32_iterator_t *it);
+/** DEPRECATED, use `roaring_uint32_iterator_copy`. */
+CROARING_DEPRECATED static inline roaring_uint32_iterator_t *
+roaring_copy_uint32_iterator(const roaring_uint32_iterator_t *it) {
+ return roaring_uint32_iterator_copy(it);
+}
+
/**
- * Free memory following `roaring_create_iterator()`
+ * Free memory following `roaring_iterator_create()`
*/
-void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
+void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it);
+
+/** DEPRECATED, use `roaring_uint32_iterator_free`. */
+CROARING_DEPRECATED static inline void roaring_free_uint32_iterator(
+ roaring_uint32_iterator_t *it) {
+ roaring_uint32_iterator_free(it);
+}
/*
* Reads next ${count} values from iterator into user-supplied ${buf}.
* Returns the number of read elements.
- * This number can be smaller than ${count}, which means that iterator is drained.
+ * This number can be smaller than ${count}, which means that iterator is
+ * drained.
*
* This function satisfies semantics of iteration and can be used together with
* other iterator functions.
* - first value is copied from ${it}->current_value
* - after function returns, iterator is positioned at the next element
*/
-uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
- uint32_t* buf, uint32_t count);
+uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
+ uint32_t *buf, uint32_t count);
+
+/** DEPRECATED, use `roaring_uint32_iterator_read`. */
+CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator(
+ roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) {
+ return roaring_uint32_iterator_read(it, buf, count);
+}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace api {
+}
+}
+} // extern "C" { namespace roaring { namespace api {
#endif
-#endif /* ROARING_H */
+#endif /* ROARING_H */
#ifdef __cplusplus
- /**
- * Best practices for C++ headers is to avoid polluting global scope.
- * But for C compatibility when just `roaring.h` is included building as
- * C++, default to global access for the C public API.
- *
- * BUT when `roaring.hh` is included instead, it sets this flag. That way
- * explicit namespacing must be used to get the C functions.
- *
- * This is outside the include guard so that if you include BOTH headers,
- * the order won't matter; you still get the global definitions.
- */
- #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
- using namespace ::roaring::api;
- #endif
+/**
+ * Best practices for C++ headers is to avoid polluting global scope.
+ * But for C compatibility when just `roaring.h` is included building as
+ * C++, default to global access for the C public API.
+ *
+ * BUT when `roaring.hh` is included instead, it sets this flag. That way
+ * explicit namespacing must be used to get the C functions.
+ *
+ * This is outside the include guard so that if you include BOTH headers,
+ * the order won't matter; you still get the global definitions.
+ */
+#if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
+using namespace ::roaring::api;
+#endif
#endif
/* end file include/roaring/roaring.h */
/* begin file include/roaring/memory.h */
@@ -1141,3 +2245,660 @@ void roaring_aligned_free(void*);
#endif // INCLUDE_ROARING_MEMORY_H_
/* end file include/roaring/memory.h */
+/* begin file include/roaring/roaring64.h */
+#ifndef ROARING64_H
+#define ROARING64_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace api {
+#endif
+
+typedef struct roaring64_bitmap_s roaring64_bitmap_t;
+typedef struct roaring64_leaf_s roaring64_leaf_t;
+typedef struct roaring64_iterator_s roaring64_iterator_t;
+
+/**
+ * A bit of context usable with `roaring64_bitmap_*_bulk()` functions.
+ *
+ * Should be initialized with `{0}` (or `memset()` to all zeros).
+ * Callers should treat it as an opaque type.
+ *
+ * A context may only be used with a single bitmap (unless re-initialized to
+ * zero), and any modification to a bitmap (other than modifications performed
+ * with `_bulk()` functions with the context passed) will invalidate any
+ * contexts associated with that bitmap.
+ */
+typedef struct roaring64_bulk_context_s {
+ uint8_t high_bytes[6];
+ roaring64_leaf_t *leaf;
+} roaring64_bulk_context_t;
+
+/**
+ * Dynamically allocates a new bitmap (initially empty).
+ * Client is responsible for calling `roaring64_bitmap_free()`.
+ */
+roaring64_bitmap_t *roaring64_bitmap_create(void);
+void roaring64_bitmap_free(roaring64_bitmap_t *r);
+
+/**
+ * Returns a copy of a bitmap.
+ */
+roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r);
+
+/**
+ * Creates a new bitmap of a pointer to N 64-bit integers.
+ */
+roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args,
+ const uint64_t *vals);
+
+#ifdef __cplusplus
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring64_bitmap_of_ptr` function instead.
+ */
+// Use an immediately invoked closure, capturing by reference
+// (in case __VA_ARGS__ refers to context outside the closure)
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring64_bitmap_from(...) \
+ [&]() { \
+ const uint64_t roaring64_bitmap_from_array[] = {0, __VA_ARGS__}; \
+ return roaring64_bitmap_of_ptr( \
+ (sizeof(roaring64_bitmap_from_array) / \
+ sizeof(roaring64_bitmap_from_array[0])) - \
+ 1, \
+ &roaring64_bitmap_from_array[1]); \
+ }()
+#else
+/**
+ * Creates a new bitmap which contains all values passed in as arguments.
+ *
+ * To create a bitmap from a variable number of arguments, use the
+ * `roaring64_bitmap_of_ptr` function instead.
+ */
+// While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof
+// expression, which is an unevaluated context, so it's even safe in the case
+// where expressions passed have side effects (roaring64_bitmap_from(my_func(),
+// ++i))
+// Include a 0 at the beginning of the array to make the array length > 0
+// (zero sized arrays are not valid in standard c/c++)
+#define roaring64_bitmap_from(...) \
+ roaring64_bitmap_of_ptr( \
+ (sizeof((const uint64_t[]){0, __VA_ARGS__}) / sizeof(uint64_t)) - 1, \
+ &((const uint64_t[]){0, __VA_ARGS__})[1])
+#endif
+
+/**
+ * Create a new bitmap containing all the values in [min, max) that are at a
+ * distance k*step from min.
+ */
+roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max,
+ uint64_t step);
+
+/**
+ * Adds the provided value to the bitmap.
+ */
+void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Adds the provided value to the bitmap.
+ * Returns true if a new value was added, false if the value already existed.
+ */
+bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Add an item, using context from a previous insert for faster insertion.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same high 48 bits of the value consecutively.
+ */
+void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r,
+ roaring64_bulk_context_t *context, uint64_t val);
+
+/**
+ * Add `n_args` values from `vals`, faster than repeatedly calling
+ * `roaring64_bitmap_add()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same high 48 bits of the value as consecutive elements in
+ * `vals`.
+ */
+void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args,
+ const uint64_t *vals);
+
+/**
+ * Add all values in range [min, max).
+ */
+void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+
+/**
+ * Add all values in range [min, max].
+ */
+void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+
+/**
+ * Removes a value from the bitmap if present.
+ */
+void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Removes a value from the bitmap if present, returns true if the value was
+ * removed and false if the value was not present.
+ */
+bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Remove an item, using context from a previous insert for faster removal.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same high 48 bits of the value consecutively.
+ */
+void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r,
+ roaring64_bulk_context_t *context,
+ uint64_t val);
+
+/**
+ * Remove `n_args` values from `vals`, faster than repeatedly calling
+ * `roaring64_bitmap_remove()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same high 48 bits of the value as consecutive elements in
+ * `vals`.
+ */
+void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args,
+ const uint64_t *vals);
+
+/**
+ * Remove all values in range [min, max).
+ */
+void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+
+/**
+ * Remove all values in range [min, max].
+ */
+void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+
+/**
+ * Returns true if the provided value is present.
+ */
+bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Returns true if all values in the range [min, max) are present.
+ */
+bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+
+/**
+ * Check if an item is present using context from a previous insert or search
+ * for faster search.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same high 48 bits of the value consecutively.
+ */
+bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r,
+ roaring64_bulk_context_t *context,
+ uint64_t val);
+
+/**
+ * Selects the element at index 'rank' where the smallest element is at index 0.
+ * If the size of the bitmap is strictly greater than rank, then this function
+ * returns true and sets element to the element of given rank. Otherwise, it
+ * returns false.
+ */
+bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank,
+ uint64_t *element);
+
+/**
+ * Returns the number of integers that are smaller or equal to x. Thus if x is
+ * the first element, this function will return 1. If x is smaller than the
+ * smallest element, this function will return 0.
+ *
+ * The indexing convention differs between roaring64_bitmap_select and
+ * roaring64_bitmap_rank: roaring_bitmap64_select refers to the smallest value
+ * as having index 0, whereas roaring64_bitmap_rank returns 1 when ranking
+ * the smallest value.
+ */
+uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val);
+
+/**
+ * Returns true if the given value is in the bitmap, and sets `out_index` to the
+ * (0-based) index of the value in the bitmap. Returns false if the value is not
+ * in the bitmap.
+ */
+bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val,
+ uint64_t *out_index);
+
+/**
+ * Returns the number of values in the bitmap.
+ */
+uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r);
+
+/**
+ * Returns the number of elements in the range [min, max).
+ */
+uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max);
+
+/**
+ * Returns true if the bitmap is empty (cardinality is zero).
+ */
+bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r);
+
+/**
+ * Returns the smallest value in the set, or UINT64_MAX if the set is empty.
+ */
+uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r);
+
+/**
+ * Returns the largest value in the set, or 0 if empty.
+ */
+uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r);
+
+/**
+ * Returns true if the result has at least one run container.
+ */
+bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r);
+
+/**
+ * Perform internal consistency checks.
+ *
+ * Returns true if the bitmap is consistent. It may be useful to call this
+ * after deserializing bitmaps from untrusted sources. If
+ * roaring64_bitmap_internal_validate returns true, then the bitmap is
+ * consistent and can be trusted not to cause crashes or memory corruption.
+ *
+ * If reason is non-null, it will be set to a string describing the first
+ * inconsistency found if any.
+ */
+bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r,
+ const char **reason);
+
+/**
+ * Return true if the two bitmaps contain the same elements.
+ */
+bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2.
+ */
+bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2, and r2 is strictly
+ * greater than r1.
+ */
+bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the intersection between two bitmaps and returns new bitmap. The
+ * caller is responsible for free-ing the result.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may
+ * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary
+ * bitmaps.
+ */
+roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the intersection between two bitmaps.
+ */
+uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_and()`, modifies `r1`. `r1` and `r2`
+ * are allowed to be equal.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmaps.
+ */
+void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Check whether two bitmaps intersect.
+ */
+bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Check whether a bitmap intersects the range [min, max).
+ */
+bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max);
+
+/**
+ * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
+ * distance, or the Jaccard similarity coefficient)
+ *
+ * The Jaccard index is undefined if both bitmaps are empty.
+ */
+double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the union between two bitmaps and returns new bitmap. The caller is
+ * responsible for free-ing the result.
+ */
+roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the union between two bitmaps.
+ */
+uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_or(), modifies `r1`.
+ */
+void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the symmetric difference (xor) between two bitmaps and returns a new
+ * bitmap. The caller is responsible for free-ing the result.
+ */
+roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the symmetric difference (xor) between two bitmaps.
+ */
+uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_xor()`, modifies `r1`. `r1` and `r2`
+ * are not allowed to be equal (that would result in an empty bitmap).
+ */
+void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the difference (andnot) between two bitmaps and returns a new
+ * bitmap. The caller is responsible for free-ing the result.
+ */
+roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Computes the size of the difference (andnot) between two bitmaps.
+ */
+uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * In-place version of `roaring64_bitmap_andnot()`, modifies `r1`. `r1` and `r2`
+ * are not allowed to be equal (that would result in an empty bitmap).
+ */
+void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2);
+
+/**
+ * Compute the negation of the bitmap in the interval [min, max).
+ * The number of negated values is `max - min`. Areas outside the range are
+ * passed through unchanged.
+ */
+roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max);
+
+/**
+ * Compute the negation of the bitmap in the interval [min, max].
+ * The number of negated values is `max - min + 1`. Areas outside the range are
+ * passed through unchanged.
+ */
+roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max);
+
+/**
+ * In-place version of `roaring64_bitmap_flip`. Compute the negation of the
+ * bitmap in the interval [min, max). The number of negated values is `max -
+ * min`. Areas outside the range are passed through unchanged.
+ */
+void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+/**
+ * In-place version of `roaring64_bitmap_flip_closed`. Compute the negation of
+ * the bitmap in the interval [min, max]. The number of negated values is `max -
+ * min + 1`. Areas outside the range are passed through unchanged.
+ */
+void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max);
+/**
+ * How many bytes are required to serialize this bitmap.
+ *
+ * This is meant to be compatible with other languages:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ */
+size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r);
+
+/**
+ * Write a bitmap to a buffer. The output buffer should refer to at least
+ * `roaring64_bitmap_portable_size_in_bytes(r)` bytes of allocated memory.
+ *
+ * Returns how many bytes were written, which should match
+ * `roaring64_bitmap_portable_size_in_bytes(r)`.
+ *
+ * This is meant to be compatible with other languages:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
+ */
+size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r,
+ char *buf);
+/**
+ * Check how many bytes would be read (up to maxbytes) at this pointer if there
+ * is a valid bitmap, returns zero if there is no valid bitmap.
+ *
+ * This is meant to be compatible with other languages
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ */
+size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
+ size_t maxbytes);
+
+/**
+ * Read a bitmap from a serialized buffer safely (reading up to maxbytes).
+ * In case of failure, NULL is returned.
+ *
+ * This is meant to be compatible with other languages
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ *
+ * The function itself is safe in the sense that it will not cause buffer
+ * overflows. However, for correct operations, it is assumed that the bitmap
+ * read was once serialized from a valid bitmap (i.e., it follows the format
+ * specification). If you provided an incorrect input (garbage), then the bitmap
+ * read may not be in a valid state and following operations may not lead to
+ * sensible results. In particular, the serialized array containers need to be
+ * in sorted order, and the run containers should be in sorted non-overlapping
+ * order. This is is guaranteed to happen when serializing an existing bitmap,
+ * but not for random inputs.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a
+ * mainframe IBM s390x), the data format is going to be big-endian and not
+ * compatible with little-endian systems.
+ */
+roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf,
+ size_t maxbytes);
+
+/**
+ * Iterate over the bitmap elements. The function `iterator` is called once for
+ * all the values with `ptr` (can be NULL) as the second parameter of each call.
+ *
+ * `roaring_iterator64` is simply a pointer to a function that returns a bool
+ * and takes `(uint64_t, void*)` as inputs. True means that the iteration should
+ * continue, while false means that it should stop.
+ *
+ * Returns true if the `roaring64_iterator` returned true throughout (so that
+ * all data points were necessarily visited).
+ *
+ * Iteration is ordered from the smallest to the largest elements.
+ */
+bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,
+ roaring_iterator64 iterator, void *ptr);
+
+/**
+ * Convert the bitmap to a sorted array `out`.
+ *
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ * ```
+ * out = malloc(roaring64_bitmap_get_cardinality(bitmap) * sizeof(uint64_t));
+ * ```
+ */
+void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r,
+ uint64_t *out);
+
+/**
+ * Create an iterator object that can be used to iterate through the values.
+ * Caller is responsible for calling `roaring64_iterator_free()`.
+ *
+ * The iterator is initialized. If there is a value, then this iterator points
+ * to the first value and `roaring64_iterator_has_value()` returns true. The
+ * value can be retrieved with `roaring64_iterator_value()`.
+ */
+roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r);
+
+/**
+ * Create an iterator object that can be used to iterate through the values.
+ * Caller is responsible for calling `roaring64_iterator_free()`.
+ *
+ * The iterator is initialized. If there is a value, then this iterator points
+ * to the last value and `roaring64_iterator_has_value()` returns true. The
+ * value can be retrieved with `roaring64_iterator_value()`.
+ */
+roaring64_iterator_t *roaring64_iterator_create_last(
+ const roaring64_bitmap_t *r);
+
+/**
+ * Re-initializes an existing iterator. Functionally the same as
+ * `roaring64_iterator_create` without a allocation.
+ */
+void roaring64_iterator_reinit(const roaring64_bitmap_t *r,
+ roaring64_iterator_t *it);
+
+/**
+ * Re-initializes an existing iterator. Functionally the same as
+ * `roaring64_iterator_create_last` without a allocation.
+ */
+void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r,
+ roaring64_iterator_t *it);
+
+/**
+ * Creates a copy of the iterator. Caller is responsible for calling
+ * `roaring64_iterator_free()` on the resulting iterator.
+ */
+roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it);
+
+/**
+ * Free the iterator.
+ */
+void roaring64_iterator_free(roaring64_iterator_t *it);
+
+/**
+ * Returns true if the iterator currently points to a value. If so, calling
+ * `roaring64_iterator_value()` returns the value.
+ */
+bool roaring64_iterator_has_value(const roaring64_iterator_t *it);
+
+/**
+ * Returns the value the iterator currently points to. Should only be called if
+ * `roaring64_iterator_has_value()` returns true.
+ */
+uint64_t roaring64_iterator_value(const roaring64_iterator_t *it);
+
+/**
+ * Advance the iterator. If there is a new value, then
+ * `roaring64_iterator_has_value()` returns true. Values are traversed in
+ * increasing order. For convenience, returns the result of
+ * `roaring64_iterator_has_value()`.
+ *
+ * Once this returns false, `roaring64_iterator_advance` should not be called on
+ * the iterator again. Calling `roaring64_iterator_previous` is allowed.
+ */
+bool roaring64_iterator_advance(roaring64_iterator_t *it);
+
+/**
+ * Decrement the iterator. If there is a new value, then
+ * `roaring64_iterator_has_value()` returns true. Values are traversed in
+ * decreasing order. For convenience, returns the result of
+ * `roaring64_iterator_has_value()`.
+ *
+ * Once this returns false, `roaring64_iterator_previous` should not be called
+ * on the iterator again. Calling `roaring64_iterator_advance` is allowed.
+ */
+bool roaring64_iterator_previous(roaring64_iterator_t *it);
+
+/**
+ * Move the iterator to the first value greater than or equal to `val`, if it
+ * exists at or after the current position of the iterator. If there is a new
+ * value, then `roaring64_iterator_has_value()` returns true. Values are
+ * traversed in increasing order. For convenience, returns the result of
+ * `roaring64_iterator_has_value()`.
+ */
+bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it,
+ uint64_t val);
+
+/**
+ * Reads up to `count` values from the iterator into the given `buf`. Returns
+ * the number of elements read. The number of elements read can be smaller than
+ * `count`, which means that there are no more elements in the bitmap.
+ *
+ * This function can be used together with other iterator functions.
+ */
+uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf,
+ uint64_t count);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace roaring
+} // namespace api
+#endif
+
+#endif /* ROARING64_H */
+/* end file include/roaring/roaring64.h */
+#endif
diff --git a/src/lib/third_party/include/roaring_v2.h b/src/lib/third_party/include/roaring_v2.h
new file mode 100644
index 000000000..500ba9cb9
--- /dev/null
+++ b/src/lib/third_party/include/roaring_v2.h
@@ -0,0 +1,1143 @@
+// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
+// Created by amalgamation.sh on 2023-02-12T11:34:02Z
+
+/*
+ * The CRoaring project is under a dual license (Apache/MIT).
+ * Users of the library may choose one or the other license.
+ */
+/*
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/*
+ * MIT License
+ *
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Permission is hereby granted, free of charge, to any
+ * person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the
+ * Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software
+ * is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+ * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+ * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+/* begin file include/roaring/roaring_version.h */
+// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
+#ifndef ROARING_INCLUDE_ROARING_VERSION
+#define ROARING_INCLUDE_ROARING_VERSION
+#define ROARING_VERSION "0.9.6"
+enum {
+ ROARING_VERSION_MAJOR = 0,
+ ROARING_VERSION_MINOR = 9,
+ ROARING_VERSION_REVISION = 6
+};
+#endif // ROARING_INCLUDE_ROARING_VERSION
+/* end file include/roaring/roaring_version.h */
+/* begin file include/roaring/roaring_types.h */
+/*
+ Typedefs used by various components
+*/
+
+#ifndef ROARING_TYPES_H
+#define ROARING_TYPES_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace api {
+#endif
+
+
+/**
+ * When building .c files as C++, there's added compile-time checking if the
+ * container types are derived from a `container_t` base class. So long as
+ * such a base class is empty, the struct will behave compatibly with C structs
+ * despite the derivation. This is due to the Empty Base Class Optimization:
+ *
+ * https://en.cppreference.com/w/cpp/language/ebo
+ *
+ * But since C isn't namespaced, taking `container_t` globally might collide
+ * with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal
+ * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;`
+ */
+#if defined(__cplusplus)
+ extern "C++" {
+ struct container_s {};
+ }
+ #define ROARING_CONTAINER_T ::roaring::api::container_s
+#else
+ #define ROARING_CONTAINER_T void // no compile-time checking
+#endif
+
+#define ROARING_FLAG_COW UINT8_C(0x1)
+#define ROARING_FLAG_FROZEN UINT8_C(0x2)
+
+/**
+ * Roaring arrays are array-based key-value pairs having containers as values
+ * and 16-bit integer keys. A roaring bitmap might be implemented as such.
+ */
+
+// parallel arrays. Element sizes quite different.
+// Alternative is array
+// of structs. Which would have better
+// cache performance through binary searches?
+
+typedef struct roaring_array_s {
+ int32_t size;
+ int32_t allocation_size;
+ ROARING_CONTAINER_T **containers; // Use container_t in non-API files!
+ uint16_t *keys;
+ uint8_t *typecodes;
+ uint8_t flags;
+} roaring_array_t;
+
+
+typedef bool (*roaring_iterator)(uint32_t value, void *param);
+typedef bool (*roaring_iterator64)(uint64_t value, void *param);
+
+/**
+* (For advanced users.)
+* The roaring_statistics_t can be used to collect detailed statistics about
+* the composition of a roaring bitmap.
+*/
+typedef struct roaring_statistics_s {
+ uint32_t n_containers; /* number of containers */
+
+ uint32_t n_array_containers; /* number of array containers */
+ uint32_t n_run_containers; /* number of run containers */
+ uint32_t n_bitset_containers; /* number of bitmap containers */
+
+ uint32_t
+ n_values_array_containers; /* number of values in array containers */
+ uint32_t n_values_run_containers; /* number of values in run containers */
+ uint32_t
+ n_values_bitset_containers; /* number of values in bitmap containers */
+
+ uint32_t n_bytes_array_containers; /* number of allocated bytes in array
+ containers */
+ uint32_t n_bytes_run_containers; /* number of allocated bytes in run
+ containers */
+ uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
+ containers */
+
+ uint32_t
+ max_value; /* the maximal value, undefined if cardinality is zero */
+ uint32_t
+ min_value; /* the minimal value, undefined if cardinality is zero */
+ uint64_t sum_value; /* the sum of all values (could be used to compute
+ average) */
+
+ uint64_t cardinality; /* total number of values stored in the bitmap */
+
+ // and n_values_arrays, n_values_rle, n_values_bitmap
+} roaring_statistics_t;
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif /* ROARING_TYPES_H */
+/* end file include/roaring/roaring_types.h */
+/* begin file include/roaring/roaring.h */
+/*
+ * An implementation of Roaring Bitmaps in C.
+ */
+
+#ifndef ROARING_H
+#define ROARING_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h> // for `size_t`
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace api {
+#endif
+
+typedef struct roaring_bitmap_s {
+ roaring_array_t high_low_container;
+} roaring_bitmap_t;
+
+/**
+ * Dynamically allocates a new bitmap (initially empty).
+ * Returns NULL if the allocation fails.
+ * Capacity is a performance hint for how many "containers" the data will need.
+ * Client is responsible for calling `roaring_bitmap_free()`.
+ */
+roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
+
+/**
+ * Dynamically allocates a new bitmap (initially empty).
+ * Returns NULL if the allocation fails.
+ * Client is responsible for calling `roaring_bitmap_free()`.
+ */
+static inline roaring_bitmap_t *roaring_bitmap_create(void)
+ { return roaring_bitmap_create_with_capacity(0); }
+
+/**
+ * Initialize a roaring bitmap structure in memory controlled by client.
+ * Capacity is a performance hint for how many "containers" the data will need.
+ * Can return false if auxiliary allocations fail when capacity greater than 0.
+ */
+bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
+
+/**
+ * Initialize a roaring bitmap structure in memory controlled by client.
+ * The bitmap will be in a "clear" state, with no auxiliary allocations.
+ * Since this performs no allocations, the function will not fail.
+ */
+static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
+ { roaring_bitmap_init_with_capacity(r, 0); }
+
+/**
+ * Add all the values between min (included) and max (excluded) that are at a
+ * distance k*step from min.
+*/
+roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
+ uint32_t step);
+
+/**
+ * Creates a new bitmap from a pointer of uint32_t integers
+ */
+roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
+
+/*
+ * Whether you want to use copy-on-write.
+ * Saves memory and avoids copies, but needs more care in a threaded context.
+ * Most users should ignore this flag.
+ *
+ * Note: If you do turn this flag to 'true', enabling COW, then ensure that you
+ * do so for all of your bitmaps, since interactions between bitmaps with and
+ * without COW is unsafe.
+ */
+static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
+ return r->high_low_container.flags & ROARING_FLAG_COW;
+}
+static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
+ bool cow) {
+ if (cow) {
+ r->high_low_container.flags |= ROARING_FLAG_COW;
+ } else {
+ r->high_low_container.flags &= ~ROARING_FLAG_COW;
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
+ int64_t offset);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/**
+ * Describe the inner structure of the bitmap.
+ */
+void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
+#endif
+
+/**
+ * Creates a new bitmap from a list of uint32_t integers
+ */
+roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
+
+/**
+ * Copies a bitmap (this does memory allocation).
+ * The caller is responsible for memory management.
+ */
+roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
+
+/**
+ * Copies a bitmap from src to dest. It is assumed that the pointer dest
+ * is to an already allocated bitmap. The content of the dest bitmap is
+ * freed/deleted.
+ *
+ * It might be preferable and simpler to call roaring_bitmap_copy except
+ * that roaring_bitmap_overwrite can save on memory allocations.
+ */
+bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
+ const roaring_bitmap_t *src);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/**
+ * Print the content of the bitmap.
+ */
+void roaring_bitmap_printf(const roaring_bitmap_t *r);
+#endif
+
+/**
+ * Computes the intersection between two bitmaps and returns new bitmap. The
+ * caller is responsible for memory management.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+ * You may also rely on roaring_bitmap_and_inplace to avoid creating
+ * many temporary bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the intersection between two bitmaps.
+ */
+uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Check whether two bitmaps intersect.
+ */
+bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Check whether a bitmap and a closed range intersect.
+ */
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
+ uint64_t x, uint64_t y);
+
+/**
+ * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
+ * distance, or the Jaccard similarity coefficient)
+ *
+ * The Jaccard index is undefined if both bitmaps are empty.
+ */
+double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the union between two bitmaps.
+ */
+uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the difference (andnot) between two bitmaps.
+ */
+uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Computes the size of the symmetric difference (xor) between two bitmaps.
+ */
+uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of `roaring_bitmap_and()`, modifies r1
+ * r1 == r2 is allowed.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+ */
+void roaring_bitmap_and_inplace(roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Computes the union between two bitmaps and returns new bitmap. The caller is
+ * responsible for memory management.
+ */
+roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of `roaring_bitmap_or(), modifies r1.
+ * TODO: decide whether r1 == r2 ok
+ */
+void roaring_bitmap_or_inplace(roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Compute the union of 'number' bitmaps.
+ * Caller is responsible for freeing the result.
+ * See also `roaring_bitmap_or_many_heap()`
+ */
+roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
+ const roaring_bitmap_t **rs);
+
+/**
+ * Compute the union of 'number' bitmaps using a heap. This can sometimes be
+ * faster than `roaring_bitmap_or_many() which uses a naive algorithm.
+ * Caller is responsible for freeing the result.
+ */
+roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
+ const roaring_bitmap_t **rs);
+
+/**
+ * Computes the symmetric difference (xor) between two bitmaps
+ * and returns new bitmap. The caller is responsible for memory management.
+ */
+roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2.
+ */
+void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Compute the xor of 'number' bitmaps.
+ * Caller is responsible for freeing the result.
+ */
+roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
+ const roaring_bitmap_t **rs);
+
+/**
+ * Computes the difference (andnot) between two bitmaps and returns new bitmap.
+ * Caller is responsible for freeing the result.
+ */
+roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2.
+ */
+void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * TODO: consider implementing:
+ *
+ * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be
+ * faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is
+ * responsible for freeing the result.""
+ *
+ * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number,
+ * const roaring_bitmap_t **rs);
+ */
+
+/**
+ * Frees the memory.
+ */
+void roaring_bitmap_free(const roaring_bitmap_t *r);
+
+/**
+ * A bit of context usable with `roaring_bitmap_*_bulk()` functions
+ *
+ * Should be initialized with `{0}` (or `memset()` to all zeros).
+ * Callers should treat it as an opaque type.
+ *
+ * A context may only be used with a single bitmap
+ * (unless re-initialized to zero), and any modification to a bitmap
+ * (other than modifications performed with `_bulk()` functions with the context
+ * passed) will invalidate any contexts associated with that bitmap.
+ */
+typedef struct roaring_bulk_context_s {
+ ROARING_CONTAINER_T *container;
+ int idx;
+ uint16_t key;
+ uint8_t typecode;
+} roaring_bulk_context_t;
+
+/**
+ * Add an item, using context from a previous insert for speed optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context, uint32_t val);
+
+/**
+ * Add value n_args from pointer vals, faster than repeatedly calling
+ * `roaring_bitmap_add()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same "key" (high 16 bits of the value) as consecutive
+ * elements in `vals`
+ */
+void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals);
+
+/**
+ * Add value x
+ */
+void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Add value x
+ * Returns true if a new value was added, false if the value already existed.
+ */
+bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Add all values in range [min, max]
+ */
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
+ uint32_t min, uint32_t max);
+
+/**
+ * Add all values in range [min, max)
+ */
+static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
+ uint64_t min, uint64_t max) {
+ if(max == min) return;
+ roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
+}
+
+/**
+ * Remove value x
+ */
+void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Remove all values in range [min, max]
+ */
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
+ uint32_t min, uint32_t max);
+
+/**
+ * Remove all values in range [min, max)
+ */
+static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
+ uint64_t min, uint64_t max) {
+ if(max == min) return;
+ roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
+}
+
+/**
+ * Remove multiple values
+ */
+void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals);
+
+/**
+ * Remove value x
+ * Returns true if a new value was removed, false if the value was not existing.
+ */
+bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Check if value is present
+ */
+bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
+
+/**
+ * Check whether a range of values from range_start (included)
+ * to range_end (excluded) is present
+ */
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
+ uint64_t range_start,
+ uint64_t range_end);
+
+/**
+ * Check if an items is present, using context from a previous insert for speed
+ * optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val);
+
+/**
+ * Get the cardinality of the bitmap (number of elements).
+ */
+uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r);
+
+/**
+ * Returns the number of elements in the range [range_start, range_end).
+ */
+uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
+ uint64_t range_start,
+ uint64_t range_end);
+
+/**
+* Returns true if the bitmap is empty (cardinality is zero).
+*/
+bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
+
+
+/**
+ * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap
+ * was initialized in client memory via roaring_bitmap_init(), then a call to
+ * roaring_bitmap_clear() would be enough to "free" it)
+ */
+void roaring_bitmap_clear(roaring_bitmap_t *r);
+
+/**
+ * Convert the bitmap to a sorted array, output in `ans`.
+ *
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ *
+ * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t));
+ */
+void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
+
+
+/**
+ * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`.
+ *
+ * Caller is responsible to ensure that there is enough memory allocated, e.g.
+ *
+ * ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t));
+ *
+ * Return false in case of failure (e.g., insufficient memory)
+ */
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
+ size_t offset, size_t limit,
+ uint32_t *ans);
+
+/**
+ * Remove run-length encoding even when it is more space efficient.
+ * Return whether a change was applied.
+ */
+bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
+
+/**
+ * Convert array and bitmap containers to run containers when it is more
+ * efficient; also convert from run containers when more space efficient.
+ *
+ * Returns true if the result has at least one run container.
+ * Additional savings might be possible by calling `shrinkToFit()`.
+ */
+bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
+
+/**
+ * If needed, reallocate memory to shrink the memory usage.
+ * Returns the number of bytes saved.
+ */
+size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
+
+/**
+ * Write the bitmap to an output pointer, this output buffer should refer to
+ * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes.
+ *
+ * See `roaring_bitmap_portable_serialize()` if you want a format that's
+ * compatible with Java and Go implementations. This format can sometimes be
+ * more space efficient than the portable form, e.g. when the data is sparse.
+ *
+ * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
+
+/**
+ * Use with `roaring_bitmap_serialize()`.
+ *
+ * (See `roaring_bitmap_portable_deserialize()` if you want a format that's
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
+
+/**
+ * How many bytes are required to serialize this bitmap (NOT compatible
+ * with Java and Go versions)
+ */
+size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
+
+/**
+ * Read bitmap from a serialized buffer.
+ * In case of failure, NULL is returned.
+ *
+ * This function is unsafe in the sense that if there is no valid serialized
+ * bitmap at the pointer, then many bytes could be read, possibly causing a
+ * buffer overflow. See also roaring_bitmap_portable_deserialize_safe().
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+*
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
+
+/**
+ * Read bitmap from a serialized buffer safely (reading up to maxbytes).
+ * In case of failure, NULL is returned.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
+ size_t maxbytes);
+
+/**
+ * Read bitmap from a serialized buffer.
+ * In case of failure, NULL is returned.
+ *
+ * Bitmap returned by this function can be used in all readonly contexts.
+ * Bitmap must be freed as usual, by calling roaring_bitmap_free().
+ * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * The function is unsafe in the following ways:
+ * 1) It may execute unaligned memory accesses.
+ * 2) A buffer overflow may occur if buf does not point to a valid serialized
+ * bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
+
+/**
+ * Check how many bytes would be read (up to maxbytes) at this pointer if there
+ * is a bitmap, returns zero if there is no valid bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ */
+size_t roaring_bitmap_portable_deserialize_size(const char *buf,
+ size_t maxbytes);
+
+/**
+ * How many bytes are required to serialize this bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ */
+size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
+
+/**
+ * Write a bitmap to a char buffer. The output buffer should refer to at least
+ * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory.
+ *
+ * Returns how many bytes were written which should match
+ * `roaring_bitmap_portable_size_in_bytes(r)`.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
+
+/*
+ * "Frozen" serialization format imitates memory layout of roaring_bitmap_t.
+ * Deserialized bitmap is a constant view of the underlying buffer.
+ * This significantly reduces amount of allocations and copying required during
+ * deserialization.
+ * It can be used with memory mapped files.
+ * Example can be found in benchmarks/frozen_benchmark.c
+ *
+ * [#####] const roaring_bitmap_t *
+ * | | |
+ * +----+ | +-+
+ * | | |
+ * [#####################################] underlying buffer
+ *
+ * Note that because frozen serialization format imitates C memory layout
+ * of roaring_bitmap_t, it is not fixed. It is different on big/little endian
+ * platforms and can be changed in future.
+ */
+
+/**
+ * Returns number of bytes required to serialize bitmap using frozen format.
+ */
+size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
+
+/**
+ * Serializes bitmap using frozen format.
+ * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
+
+/**
+ * Creates constant bitmap that is a view of a given buffer.
+ * Buffer data should have been written by `roaring_bitmap_frozen_serialize()`
+ * Its beginning must also be aligned by 32 bytes.
+ * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`.
+ * In case of failure, NULL is returned.
+ *
+ * Bitmap returned by this function can be used in all readonly contexts.
+ * Bitmap must be freed as usual, by calling roaring_bitmap_free().
+ * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ */
+const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
+ size_t length);
+
+/**
+ * Iterate over the bitmap elements. The function iterator is called once for
+ * all the values with ptr (can be NULL) as the second parameter of each call.
+ *
+ * `roaring_iterator` is simply a pointer to a function that returns bool
+ * (true means that the iteration should continue while false means that it
+ * should stop), and takes (uint32_t,void*) as inputs.
+ *
+ * Returns true if the roaring_iterator returned true throughout (so that all
+ * data points were necessarily visited).
+ *
+ * Iteration is ordered: from the smallest to the largest elements.
+ */
+bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
+ void *ptr);
+
+bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
+ uint64_t high_bits, void *ptr);
+
+/**
+ * Return true if the two bitmaps contain the same elements.
+ */
+bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2.
+ */
+bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Return true if all the elements of r1 are also in r2, and r2 is strictly
+ * greater than r1.
+ */
+bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Computes the union between two bitmaps and returns new bitmap. The caller is
+ * responsible for memory management.
+ *
+ * The lazy version defers some computations such as the maintenance of the
+ * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()`
+ * after executing "lazy" computations.
+ *
+ * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result.
+ *
+ * `bitsetconversion` is a flag which determines whether container-container
+ * operations force a bitset conversion.
+ */
+roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2,
+ const bool bitsetconversion);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Inplace version of roaring_bitmap_lazy_or, modifies r1.
+ *
+ * `bitsetconversion` is a flag which determines whether container-container
+ * operations force a bitset conversion.
+ */
+void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2,
+ const bool bitsetconversion);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()`
+ * or modified with `roaring_bitmap_lazy_or_inplace()`.
+ */
+void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1);
+
+/**
+ * Computes the symmetric difference between two bitmaps and returns new bitmap.
+ * The caller is responsible for memory management.
+ *
+ * The lazy version defers some computations such as the maintenance of the
+ * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()`
+ * after executing "lazy" computations.
+ *
+ * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on
+ * the result.
+ */
+roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * (For expert users who seek high performance.)
+ *
+ * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2
+ */
+void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2);
+
+/**
+ * Compute the negation of the bitmap in the interval [range_start, range_end).
+ * The number of negated values is range_end - range_start.
+ * Areas outside the range are passed through unchanged.
+ */
+roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1,
+ uint64_t range_start, uint64_t range_end);
+
+/**
+ * compute (in place) the negation of the roaring bitmap within a specified
+ * interval: [range_start, range_end). The number of negated values is
+ * range_end - range_start.
+ * Areas outside the range are passed through unchanged.
+ */
+void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start,
+ uint64_t range_end);
+
+/**
+ * Selects the element at index 'rank' where the smallest element is at index 0.
+ * If the size of the roaring bitmap is strictly greater than rank, then this
+ * function returns true and sets element to the element of given rank.
+ * Otherwise, it returns false.
+ */
+bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
+ uint32_t *element);
+
+/**
+ * roaring_bitmap_rank returns the number of integers that are smaller or equal
+ * to x. Thus if x is the first element, this function will return 1. If
+ * x is smaller than the smallest element, this function will return 0.
+ *
+ * The indexing convention differs between roaring_bitmap_select and
+ * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value
+ * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking
+ * the smallest value.
+ */
+uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
+
+/**
+ * Returns the smallest value in the set, or UINT32_MAX if the set is empty.
+ */
+uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
+
+/**
+ * Returns the greatest value in the set, or 0 if the set is empty.
+ */
+uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
+
+/**
+ * (For advanced users.)
+ *
+ * Collect statistics about the bitmap, see roaring_types.h for
+ * a description of roaring_statistics_t
+ */
+void roaring_bitmap_statistics(const roaring_bitmap_t *r,
+ roaring_statistics_t *stat);
+
+/*********************
+* What follows is code use to iterate through values in a roaring bitmap
+
+roaring_bitmap_t *r =...
+roaring_uint32_iterator_t i;
+roaring_create_iterator(r, &i);
+while(i.has_value) {
+ printf("value = %d\n", i.current_value);
+ roaring_advance_uint32_iterator(&i);
+}
+
+Obviously, if you modify the underlying bitmap, the iterator
+becomes invalid. So don't.
+*/
+
+typedef struct roaring_uint32_iterator_s {
+ const roaring_bitmap_t *parent; // owner
+ int32_t container_index; // point to the current container index
+ int32_t in_container_index; // for bitset and array container, this is out
+ // index
+ int32_t run_index; // for run container, this points at the run
+
+ uint32_t current_value;
+ bool has_value;
+
+ const ROARING_CONTAINER_T
+ *container; // should be:
+ // parent->high_low_container.containers[container_index];
+ uint8_t typecode; // should be:
+ // parent->high_low_container.typecodes[container_index];
+ uint32_t highbits; // should be:
+ // parent->high_low_container.keys[container_index]) <<
+ // 16;
+
+} roaring_uint32_iterator_t;
+
+/**
+ * Initialize an iterator object that can be used to iterate through the
+ * values. If there is a value, then this iterator points to the first value
+ * and `it->has_value` is true. The value is in `it->current_value`.
+ */
+void roaring_init_iterator(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit);
+
+/**
+ * Initialize an iterator object that can be used to iterate through the
+ * values. If there is a value, then this iterator points to the last value
+ * and `it->has_value` is true. The value is in `it->current_value`.
+ */
+void roaring_init_iterator_last(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit);
+
+/**
+ * Create an iterator object that can be used to iterate through the values.
+ * Caller is responsible for calling `roaring_free_iterator()`.
+ *
+ * The iterator is initialized (this function calls `roaring_init_iterator()`)
+ * If there is a value, then this iterator points to the first value and
+ * `it->has_value` is true. The value is in `it->current_value`.
+ */
+roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r);
+
+/**
+* Advance the iterator. If there is a new value, then `it->has_value` is true.
+* The new value is in `it->current_value`. Values are traversed in increasing
+* orders. For convenience, returns `it->has_value`.
+*/
+bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
+
+/**
+* Decrement the iterator. If there's a new value, then `it->has_value` is true.
+* The new value is in `it->current_value`. Values are traversed in decreasing
+* order. For convenience, returns `it->has_value`.
+*/
+bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
+
+/**
+ * Move the iterator to the first value >= `val`. If there is a such a value,
+ * then `it->has_value` is true. The new value is in `it->current_value`.
+ * For convenience, returns `it->has_value`.
+ */
+bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
+ uint32_t val);
+
+/**
+ * Creates a copy of an iterator.
+ * Caller must free it.
+ */
+roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+ const roaring_uint32_iterator_t *it);
+
+/**
+ * Free memory following `roaring_create_iterator()`
+ */
+void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
+
+/*
+ * Reads next ${count} values from iterator into user-supplied ${buf}.
+ * Returns the number of read elements.
+ * This number can be smaller than ${count}, which means that iterator is drained.
+ *
+ * This function satisfies semantics of iteration and can be used together with
+ * other iterator functions.
+ * - first value is copied from ${it}->current_value
+ * - after function returns, iterator is positioned at the next element
+ */
+uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
+ uint32_t* buf, uint32_t count);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif /* ROARING_H */
+
+#ifdef __cplusplus
+ /**
+ * Best practices for C++ headers is to avoid polluting global scope.
+ * But for C compatibility when just `roaring.h` is included building as
+ * C++, default to global access for the C public API.
+ *
+ * BUT when `roaring.hh` is included instead, it sets this flag. That way
+ * explicit namespacing must be used to get the C functions.
+ *
+ * This is outside the include guard so that if you include BOTH headers,
+ * the order won't matter; you still get the global definitions.
+ */
+ #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
+ using namespace ::roaring::api;
+ #endif
+#endif
+/* end file include/roaring/roaring.h */
+/* begin file include/roaring/memory.h */
+#ifndef INCLUDE_ROARING_MEMORY_H_
+#define INCLUDE_ROARING_MEMORY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h> // for size_t
+
+typedef void* (*roaring_malloc_p)(size_t);
+typedef void* (*roaring_realloc_p)(void*, size_t);
+typedef void* (*roaring_calloc_p)(size_t, size_t);
+typedef void (*roaring_free_p)(void*);
+typedef void* (*roaring_aligned_malloc_p)(size_t, size_t);
+typedef void (*roaring_aligned_free_p)(void*);
+
+typedef struct roaring_memory_s {
+ roaring_malloc_p malloc;
+ roaring_realloc_p realloc;
+ roaring_calloc_p calloc;
+ roaring_free_p free;
+ roaring_aligned_malloc_p aligned_malloc;
+ roaring_aligned_free_p aligned_free;
+} roaring_memory_t;
+
+void roaring_init_memory_hook(roaring_memory_t memory_hook);
+
+void* roaring_malloc(size_t);
+void* roaring_realloc(void*, size_t);
+void* roaring_calloc(size_t, size_t);
+void roaring_free(void*);
+void* roaring_aligned_malloc(size_t, size_t);
+void roaring_aligned_free(void*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INCLUDE_ROARING_MEMORY_H_
+/* end file include/roaring/memory.h */
diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c
index eb7146943..c309c043e 100644
--- a/src/lib/third_party/src/ahocorasick.c
+++ b/src/lib/third_party/src/ahocorasick.c
@@ -43,7 +43,7 @@ typedef __kernel_size_t size_t;
#include "ndpi_api.h"
#include "ahocorasick.h"
-#include "../../ndpi_replace_printf.h"
+#include "../../../include/ndpi_replace_printf.h"
/* TODO: For different depth of node, number of outgoing edges differs
considerably, It is efficient to use different chunk size for
diff --git a/src/lib/third_party/src/roaring.c b/src/lib/third_party/src/roaring.c
index fd382ed00..b75a658b2 100644
--- a/src/lib/third_party/src/roaring.c
+++ b/src/lib/third_party/src/roaring.c
@@ -1,5 +1,15 @@
+#include "ndpi_config.h"
+
+#ifndef USE_ROARING_V2
+#ifndef WIN32
+#include "ndpi_config.h"
+
+#define NDPI_REPLACE_FPRINTF
+#include "../../../include/ndpi_replace_printf.h"
+#endif
+
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
-// Created by amalgamation.sh on 2023-02-12T11:34:02Z
+// Created by amalgamation.sh on 2024-03-20T03:56:45Z
/*
* The CRoaring project is under a dual license (Apache/MIT).
@@ -62,649 +72,49 @@
#endif
#include "roaring.h" /* include public API definitions */
-/* begin file include/roaring/portability.h */
-/*
- * portability.h
- *
- */
-
- /**
- * All macros should be prefixed with either CROARING or ROARING.
- * The library uses both ROARING_...
- * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for
- * macros that are provided by the build system or that are closely
- * related to the format. The header macros may also use ROARING_.
- * The CROARING_ prefix is for internal macros that a user is unlikely
- * to ever interact with.
- */
-
-#ifndef INCLUDE_PORTABILITY_H_
-#define INCLUDE_PORTABILITY_H_
-
-#ifndef __STDC_FORMAT_MACROS
-#define __STDC_FORMAT_MACROS 1
-#endif // __STDC_FORMAT_MACROS
-
-#ifdef _MSC_VER
-#define CROARING_VISUAL_STUDIO 1
-/**
- * We want to differentiate carefully between
- * clang under visual studio and regular visual
- * studio.
- */
-#ifdef __clang__
-// clang under visual studio
-#define CROARING_CLANG_VISUAL_STUDIO 1
-#else
-// just regular visual studio (best guess)
-#define CROARING_REGULAR_VISUAL_STUDIO 1
-#endif // __clang__
-#endif // _MSC_VER
-
-#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
-#undef _POSIX_C_SOURCE
-#endif
-
-#ifndef _POSIX_C_SOURCE
-#define _POSIX_C_SOURCE 200809L
-#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
-#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
-#define _XOPEN_SOURCE 700
-#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h> // will provide posix_memalign with _POSIX_C_SOURCE as defined above
-#ifdef __GLIBC__
-#include <malloc.h> // this should never be needed but there are some reports that it is needed.
-#endif
-
-#ifdef __cplusplus
-extern "C" { // portability definitions are in global scope, not a namespace
-#endif
-
-#if CROARING_REGULAR_VISUAL_STUDIO && !defined(_WIN64) && !defined(CROARING_ACK_32BIT)
-#pragma message( \
- "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.")
-#endif
-
-#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
-#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
-#endif
-
-#if CROARING_REGULAR_VISUAL_STUDIO
-#define __restrict__ __restrict
-#endif // CROARING_REGULAR_VISUAL_STUDIO
-
-
-
-#if defined(__x86_64__) || defined(_M_X64)
-// we have an x64 processor
-#define CROARING_IS_X64 1
-
-#if defined(_MSC_VER) && (_MSC_VER < 1910)
-// Old visual studio systems won't support AVX2 well.
-#undef CROARING_IS_X64
-#endif
-
-#if defined(__clang_major__) && (__clang_major__<= 8) && !defined(__AVX2__)
-// Older versions of clang have a bug affecting us
-// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces
-#undef CROARING_IS_X64
-#endif
-
-#ifdef CROARING_DISABLE_X64
-#undef CROARING_IS_X64
-#endif
-
-
-#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
- /* RH 7 don't have atomic includes */
-#undef CROARING_IS_X64
-#undef ALLOW_UNALIGNED
-#define ALLOW_UNALIGNED
-#endif
-
-
-
-// we include the intrinsic header
-#if !CROARING_REGULAR_VISUAL_STUDIO
-/* Non-Microsoft C/C++-compatible compiler */
-#include <x86intrin.h> // on some recent GCC, this will declare posix_memalign
-
-
-
-#ifdef CROARING_CLANG_VISUAL_STUDIO
-
-/**
- * You are not supposed, normally, to include these
- * headers directly. Instead you should either include intrin.h
- * or x86intrin.h. However, when compiling with clang
- * under Windows (i.e., when _MSC_VER is set), these headers
- * only get included *if* the corresponding features are detected
- * from macros:
- * e.g., if __AVX2__ is set... in turn, we normally set these
- * macros by compiling against the corresponding architecture
- * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
- * software with these advanced instructions. These headers would
- * normally guard against such usage, but we carefully included
- * <x86intrin.h> (or <intrin.h>) before, so the headers
- * are fooled.
- */
-#include <bmiintrin.h> // for _blsr_u64
-#include <lzcntintrin.h> // for __lzcnt64
-#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
-#include <smmintrin.h>
-#include <tmmintrin.h>
-#include <avxintrin.h>
-#include <avx2intrin.h>
-#include <wmmintrin.h>
-// unfortunately, we may not get _blsr_u64, but, thankfully, clang
-// has it as a macro.
-#ifndef _blsr_u64
-// we roll our own
-#define _blsr_u64(n) ((n - 1) & n)
-#endif // _blsr_u64
-#endif // SIMDJSON_CLANG_VISUAL_STUDIO
-
-
-#endif // CROARING_REGULAR_VISUAL_STUDIO
-#endif // defined(__x86_64__) || defined(_M_X64)
-
-#if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
-# define USENEON
-#endif
-#if defined(USENEON)
-# include <arm_neon.h>
-#endif
-
-#if !CROARING_REGULAR_VISUAL_STUDIO
-/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
- * assembly */
-#define CROARING_INLINE_ASM 1
-#endif // _MSC_VER
-
-#if CROARING_REGULAR_VISUAL_STUDIO
-/* Microsoft C/C++-compatible compiler */
-#include <intrin.h>
-
-#ifndef __clang__ // if one compiles with MSVC *with* clang, then these
- // intrinsics are defined!!!
-// sadly there is no way to check whether we are missing these intrinsics
-// specifically.
-
-/* wrappers for Visual Studio built-ins that look like gcc built-ins */
-/* result might be undefined when input_num is zero */
-inline int __builtin_ctzll(unsigned long long input_num) {
- unsigned long index;
-#ifdef _WIN64 // highly recommended!!!
- _BitScanForward64(&index, input_num);
-#else // if we must support 32-bit Windows
- if ((uint32_t)input_num != 0) {
- _BitScanForward(&index, (uint32_t)input_num);
- } else {
- _BitScanForward(&index, (uint32_t)(input_num >> 32));
- index += 32;
- }
-#endif
- return index;
-}
-
-/* result might be undefined when input_num is zero */
-inline int __builtin_clzll(unsigned long long input_num) {
- unsigned long index;
-#ifdef _WIN64 // highly recommended!!!
- _BitScanReverse64(&index, input_num);
-#else // if we must support 32-bit Windows
- if (input_num > 0xFFFFFFFF) {
- _BitScanReverse(&index, (uint32_t)(input_num >> 32));
- index += 32;
- } else {
- _BitScanReverse(&index, (uint32_t)(input_num));
- }
-#endif
- return 63 - index;
-}
-
-
-/* software implementation avoids POPCNT */
-/*static inline int __builtin_popcountll(unsigned long long input_num) {
- const uint64_t m1 = 0x5555555555555555; //binary: 0101...
- const uint64_t m2 = 0x3333333333333333; //binary: 00110011..
- const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ...
- const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
-
- input_num -= (input_num >> 1) & m1;
- input_num = (input_num & m2) + ((input_num >> 2) & m2);
- input_num = (input_num + (input_num >> 4)) & m4;
- return (input_num * h01) >> 56;
-}*/
-
-/* Use #define so this is effective even under /Ob0 (no inline) */
-#define __builtin_unreachable() __assume(0)
-#endif
-
-#endif
-
-#if CROARING_REGULAR_VISUAL_STUDIO
-#define ALIGNED(x) __declspec(align(x))
-#elif defined(__GNUC__) || defined(__clang__)
-#define ALIGNED(x) __attribute__((aligned(x)))
-#else
-#warning "Warning. Unrecognized compiler."
-#define ALIGNED(x)
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-#define WARN_UNUSED __attribute__((warn_unused_result))
-#else
-#define WARN_UNUSED
-#endif
-
-#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
-
-#ifdef USENEON
-// we can always compute the popcount fast.
-#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
-// we will need this function:
-static inline int hammingbackup(uint64_t x) {
- uint64_t c1 = UINT64_C(0x5555555555555555);
- uint64_t c2 = UINT64_C(0x3333333333333333);
- uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F);
- x -= (x >> 1) & c1;
- x = (( x >> 2) & c2) + (x & c2); x=(x +(x>>4))&c4;
- x *= UINT64_C(0x0101010101010101);
- return x >> 56;
-}
-#endif
-
-
-static inline int hamming(uint64_t x) {
-#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
-#ifdef USENEON
- return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
-#elif defined(_M_ARM64)
- return hammingbackup(x);
- // (int) _CountOneBits64(x); is unavailable
-#else // _M_ARM64
- return (int) __popcnt64(x);
-#endif // _M_ARM64
-#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
-#ifdef _M_ARM
- return hammingbackup(x);
- // _CountOneBits is unavailable
-#else // _M_ARM
- return (int) __popcnt(( unsigned int)x) + (int) __popcnt(( unsigned int)(x>>32));
-#endif // _M_ARM
-#else
- return __builtin_popcountll(x);
-#endif
-}
-
-#ifndef UINT64_C
-#define UINT64_C(c) (c##ULL)
-#endif // UINT64_C
-
-#ifndef UINT32_C
-#define UINT32_C(c) (c##UL)
-#endif // UINT32_C
-
-#ifdef __cplusplus
-} // extern "C" {
-#endif // __cplusplus
-
-
-// this is almost standard?
-#undef STRINGIFY_IMPLEMENTATION_
-#undef STRINGIFY
-#define STRINGIFY_IMPLEMENTATION_(a) #a
-#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)
-
-// Our fast kernels require 64-bit systems.
-//
-// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
-// Furthermore, the number of SIMD registers is reduced.
-//
-// On 32-bit ARM, we would have smaller registers.
-//
-// The library should still have the fallback kernel. It is
-// slower, but it should run everywhere.
-
-//
-// Enable valid runtime implementations, and select CROARING_BUILTIN_IMPLEMENTATION
-//
-
-// We are going to use runtime dispatch.
-#ifdef CROARING_IS_X64
-#ifdef __clang__
-// clang does not have GCC push pop
-// warning: clang attribute push can't be used within a namespace in clang up
-// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be *outside* of a
-// namespace.
-#define CROARING_TARGET_REGION(T) \
- _Pragma(STRINGIFY( \
- clang attribute push(__attribute__((target(T))), apply_to = function)))
-#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop")
-#elif defined(__GNUC__)
-// GCC is easier
-#define CROARING_TARGET_REGION(T) \
- _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
-#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options")
-#endif // clang then gcc
-
-#endif // CROARING_IS_X64
-
-// Default target region macros don't do anything.
-#ifndef CROARING_TARGET_REGION
-#define CROARING_TARGET_REGION(T)
-#define CROARING_UNTARGET_REGION
-#endif
-
-#define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
-
-#ifdef __AVX2__
-// No need for runtime dispatching.
-// It is unnecessary and harmful to old clang to tag regions.
-#undef CROARING_TARGET_AVX2
-#define CROARING_TARGET_AVX2
-#undef CROARING_UNTARGET_REGION
-#define CROARING_UNTARGET_REGION
-#endif
-
-#ifndef ALLOW_UNALIGNED
-// Allow unaligned memory access
-#if defined(__GNUC__) || defined(__clang__)
-#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
-#else
-#define ALLOW_UNALIGNED
-#endif
-#endif
-
-#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
- #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
- #elif defined(_WIN32)
- #define CROARING_IS_BIG_ENDIAN 0
- #else
- #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
- #include <machine/endian.h>
- #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
- #include <sys/byteorder.h>
- #else // defined(__APPLE__) || defined(__FreeBSD__)
-
- #ifdef __has_include
- #if __has_include(<endian.h>)
- #include <endian.h>
- #endif //__has_include(<endian.h>)
- #endif //__has_include
-
- #endif // defined(__APPLE__) || defined(__FreeBSD__)
-
-
- #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
- #define CROARING_IS_BIG_ENDIAN 0
- #endif
-
- #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- #define CROARING_IS_BIG_ENDIAN 0
- #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- #define CROARING_IS_BIG_ENDIAN 1
- #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#endif
-
-// We need portability.h to be included first,
-// but we also always want isadetection.h to be
-// included (right after).
-// See https://github.com/RoaringBitmap/CRoaring/issues/394
-// There is no scenario where we want portability.h to
-// be included, but not isadetection.h: the latter is a
-// strict requirement.
-#endif /* INCLUDE_PORTABILITY_H_ */
-/* end file include/roaring/portability.h */
/* begin file include/roaring/isadetection.h */
-/* From
-https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
-Highly modified.
-
-Copyright (c) 2016- Facebook, Inc (Adam Paszke)
-Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
-Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
-Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
-Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
-Copyright (c) 2011-2013 NYU (Clement Farabet)
-Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
-Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
-(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
-Samy Bengio, Johnny Mariethoz)
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
-3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
-America and IDIAP Research Institute nor the names of its contributors may be
- used to endorse or promote products derived from this software without
- specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-*/
-
#ifndef ROARING_ISADETECTION_H
#define ROARING_ISADETECTION_H
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#ifdef __has_include
+// We want to make sure that the AVX-512 functions are only built on compilers
+// fully supporting AVX-512.
+#if __has_include(<avx512vbmi2intrin.h>)
+#define CROARING_COMPILER_SUPPORTS_AVX512 1
+#endif // #if __has_include(<avx512vbmi2intrin.h>)
+#endif // #ifdef __has_include
+
+// Visual Studio 2019 and up support AVX-512
+#ifdef _MSC_VER
+#if _MSC_VER >= 1920
+#define CROARING_COMPILER_SUPPORTS_AVX512 1
+#endif // #if _MSC_VER >= 1920
+#endif // #ifdef _MSC_VER
-// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H).
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-
-// We need portability.h to be included first, see
-// https://github.com/RoaringBitmap/CRoaring/issues/394
-#if CROARING_REGULAR_VISUAL_STUDIO
-#include <intrin.h>
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
-#include <cpuid.h>
-#endif // CROARING_REGULAR_VISUAL_STUDIO
-
-
-enum croaring_instruction_set {
- CROARING_DEFAULT = 0x0,
- CROARING_NEON = 0x1,
- CROARING_AVX2 = 0x4,
- CROARING_SSE42 = 0x8,
- CROARING_PCLMULQDQ = 0x10,
- CROARING_BMI1 = 0x20,
- CROARING_BMI2 = 0x40,
- CROARING_ALTIVEC = 0x80,
- CROARING_UNINITIALIZED = 0x8000
-};
-
-#if defined(__PPC64__)
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-// return CROARING_ALTIVEC;
-//}
-
-#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
-
-#if defined(__ARM_NEON)
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-// return CROARING_NEON;
-//}
-
-#else // ARM without NEON
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-// return CROARING_DEFAULT;
-//}
-
-#endif
-
-#elif defined(__x86_64__) || defined(_M_AMD64) // x64
-
-
-
-
-static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
- uint32_t *edx) {
-
-#if CROARING_REGULAR_VISUAL_STUDIO
- int cpu_info[4];
- __cpuid(cpu_info, *eax);
- *eax = cpu_info[0];
- *ebx = cpu_info[1];
- *ecx = cpu_info[2];
- *edx = cpu_info[3];
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
- uint32_t level = *eax;
- __get_cpuid(level, eax, ebx, ecx, edx);
-#else
- uint32_t a = *eax, b, c = *ecx, d;
- __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
- *eax = a;
- *ebx = b;
- *ecx = c;
- *edx = d;
-#endif
-}
-
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
- uint32_t eax, ebx, ecx, edx;
- uint32_t host_isa = 0x0;
- // Can be found on Intel ISA Reference for CPUID
- static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7
- static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7
- static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7
- static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1
- static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1
- // ECX for EAX=0x7
- eax = 0x7;
- ecx = 0x0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (ebx & cpuid_avx2_bit) {
- host_isa |= CROARING_AVX2;
- }
- if (ebx & cpuid_bmi1_bit) {
- host_isa |= CROARING_BMI1;
- }
-
- if (ebx & cpuid_bmi2_bit) {
- host_isa |= CROARING_BMI2;
- }
-
- // EBX for EAX=0x1
- eax = 0x1;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- if (ecx & cpuid_sse42_bit) {
- host_isa |= CROARING_SSE42;
- }
-
- if (ecx & cpuid_pclmulqdq_bit) {
- host_isa |= CROARING_PCLMULQDQ;
- }
-
- return host_isa;
-}
-#else // fallback
-
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-// return CROARING_DEFAULT;
-//}
-
-
-#endif // end SIMD extension detection code
-
-
-#if defined(__x86_64__) || defined(_M_AMD64) // x64
-
-#if defined(__cplusplus)
-static inline uint32_t croaring_detect_supported_architectures() {
- // thread-safe as per the C++11 standard.
- static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
- return buffer;
-}
-#elif CROARING_VISUAL_STUDIO
-// Visual Studio does not support C11 atomics.
-static inline uint32_t croaring_detect_supported_architectures() {
- static int buffer = CROARING_UNINITIALIZED;
- if (buffer == CROARING_UNINITIALIZED) {
- buffer = dynamic_croaring_detect_supported_architectures();
- }
- return buffer;
-}
-#else // CROARING_VISUAL_STUDIO
-
-
-#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
-#define ROARING_DISABLE_AVX
-#undef __AVX2__
-/* CentOS 7 */
-static inline uint32_t croaring_detect_supported_architectures() {
- return(dynamic_croaring_detect_supported_architectures());
-}
-#else
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#define CROARING_COMPILER_SUPPORTS_AVX512 0
+#endif // #ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#endif // #ifndef CROARING_COMPILER_SUPPORTS_AVX512
-#include <stdatomic.h>
-static inline uint32_t croaring_detect_supported_architectures() {
- // we use an atomic for thread safety
- static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
- if (buffer == CROARING_UNINITIALIZED) {
- // atomicity is sufficient
- buffer = dynamic_croaring_detect_supported_architectures();
- }
- return buffer;
-}
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
-#endif // CROARING_REGULAR_VISUAL_STUDIO
-
-#ifdef ROARING_DISABLE_AVX
-static inline bool croaring_avx2() {
- return false;
-}
-#elif defined(__AVX2__)
-static inline bool croaring_avx2() {
- return true;
+enum {
+ ROARING_SUPPORTS_AVX2 = 1,
+ ROARING_SUPPORTS_AVX512 = 2,
+};
+int croaring_hardware_support(void);
+#ifdef __cplusplus
}
-#else
-static inline bool croaring_avx2() {
- return (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
}
+} // extern "C" { namespace roaring { namespace internal {
#endif
-
-
-#else // defined(__x86_64__) || defined(_M_AMD64) // x64
-
-//static inline bool croaring_avx2() {
-// return false;
-//}
-
-//static inline uint32_t croaring_detect_supported_architectures() {
-// // no runtime dispatch
-// return dynamic_croaring_detect_supported_architectures();
-//}
-#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
-
-#endif // ROARING_ISADETECTION_H
+#endif // x64
+#endif // ROARING_ISADETECTION_H
/* end file include/roaring/isadetection.h */
/* begin file include/roaring/containers/perfparameters.h */
#ifndef PERFPARAMETERS_H_
@@ -713,7 +123,9 @@ static inline bool croaring_avx2() {
#include <stdbool.h>
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/**
@@ -748,7 +160,9 @@ enum { ARRAY_DEFAULT_INIT_SIZE = 0 };
#endif
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif
@@ -766,16 +180,16 @@ enum { ARRAY_DEFAULT_INIT_SIZE = 0 };
#define INCLUDE_CONTAINERS_CONTAINER_DEFS_H_
#ifdef __cplusplus
- #include <type_traits> // used by casting helper for compile-time check
+#include <type_traits> // used by casting helper for compile-time check
#endif
// The preferences are a separate file to separate out tweakable parameters
#ifdef __cplusplus
-namespace roaring { namespace internal { // No extern "C" (contains template)
+namespace roaring {
+namespace internal { // No extern "C" (contains template)
#endif
-
/*
* Since roaring_array_t's definition is not opaque, the container type is
* part of the API. If it's not going to be `void*` then it needs a name, and
@@ -789,7 +203,6 @@ namespace roaring { namespace internal { // No extern "C" (contains template)
typedef ROARING_CONTAINER_T container_t;
#undef ROARING_CONTAINER_T
-
/*
* See ROARING_CONTAINER_T for notes on using container_t as a base class.
* This macro helps make the following pattern look nicer:
@@ -805,14 +218,11 @@ typedef ROARING_CONTAINER_T container_t;
* }
*/
#if defined(__cplusplus)
- #define STRUCT_CONTAINER(name) \
- struct name : public container_t /* { ... } */
+#define STRUCT_CONTAINER(name) struct name : public container_t /* { ... } */
#else
- #define STRUCT_CONTAINER(name) \
- struct name /* { ... } */
+#define STRUCT_CONTAINER(name) struct name /* { ... } */
#endif
-
/**
* Since container_t* is not void* in C++, "dangerous" casts are not needed to
* downcast; only a static_cast<> is needed. Define a macro for static casting
@@ -831,34 +241,32 @@ typedef ROARING_CONTAINER_T container_t;
* leveraging <type_traits> to make sure it's legal in the C++ build.
*/
#ifdef __cplusplus
- #define CAST(type,value) static_cast<type>(value)
- #define movable_CAST(type,value) movable_CAST_HELPER<type>(value)
-
- template<typename PPDerived, typename Base>
- PPDerived movable_CAST_HELPER(Base **ptr_to_ptr) {
- typedef typename std::remove_pointer<PPDerived>::type PDerived;
- typedef typename std::remove_pointer<PDerived>::type Derived;
- static_assert(
- std::is_base_of<Base, Derived>::value,
- "use movable_CAST() for container_t** => xxx_container_t**"
- );
- return reinterpret_cast<Derived**>(ptr_to_ptr);
- }
+#define CAST(type, value) static_cast<type>(value)
+#define movable_CAST(type, value) movable_CAST_HELPER<type>(value)
+
+template <typename PPDerived, typename Base>
+PPDerived movable_CAST_HELPER(Base **ptr_to_ptr) {
+ typedef typename std::remove_pointer<PPDerived>::type PDerived;
+ typedef typename std::remove_pointer<PDerived>::type Derived;
+ static_assert(std::is_base_of<Base, Derived>::value,
+ "use movable_CAST() for container_t** => xxx_container_t**");
+ return reinterpret_cast<Derived **>(ptr_to_ptr);
+}
#else
- #define CAST(type,value) ((type)value)
- #define movable_CAST(type, value) ((type)value)
+#define CAST(type, value) ((type)value)
+#define movable_CAST(type, value) ((type)value)
#endif
// Use for converting e.g. an `array_container_t**` to a `container_t**`
//
-#define movable_CAST_base(c) movable_CAST(container_t **, c)
-
+#define movable_CAST_base(c) movable_CAST(container_t **, c)
#ifdef __cplusplus
-} } // namespace roaring { namespace internal {
+}
+} // namespace roaring { namespace internal {
#endif
-#endif /* INCLUDE_CONTAINERS_CONTAINER_DEFS_H_ */
+#endif /* INCLUDE_CONTAINERS_CONTAINER_DEFS_H_ */
/* end file include/roaring/containers/container_defs.h */
/* begin file include/roaring/array_util.h */
#ifndef ARRAY_UTIL_H
@@ -866,10 +274,22 @@ typedef ROARING_CONTAINER_T container_t;
#include <stddef.h> // for size_t
#include <stdint.h>
+#include <string.h>
-
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/*
@@ -877,8 +297,8 @@ extern "C" { namespace roaring { namespace internal {
* Assumes that array is sorted, has logarithmic complexity.
* if the result is x, then:
* if ( x>0 ) you have array[x] = ikey
- * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey)
- * keys the array sorted.
+ * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that
+ * array[-x-1]=ikey) keys the array sorted.
*/
inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
uint16_t ikey) {
@@ -901,9 +321,9 @@ inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
/**
* Galloping search
* Assumes that array is sorted, has logarithmic complexity.
- * if the result is x, then if x = length, you have that all values in array between pos and length
- * are smaller than min.
- * otherwise returns the first index x such that array[x] >= min.
+ * if the result is x, then if x = length, you have that all values in array
+ * between pos and length are smaller than min. otherwise returns the first
+ * index x such that array[x] >= min.
*/
static inline int32_t advanceUntil(const uint16_t *array, int32_t pos,
int32_t length, uint16_t min) {
@@ -951,18 +371,18 @@ static inline int32_t advanceUntil(const uint16_t *array, int32_t pos,
}
/**
- * Returns number of elements which are less then $ikey.
+ * Returns number of elements which are less than ikey.
* Array elements must be unique and sorted.
*/
static inline int32_t count_less(const uint16_t *array, int32_t lenarray,
uint16_t ikey) {
if (lenarray == 0) return 0;
int32_t pos = binarySearch(array, lenarray, ikey);
- return pos >= 0 ? pos : -(pos+1);
+ return pos >= 0 ? pos : -(pos + 1);
}
/**
- * Returns number of elements which are greater then $ikey.
+ * Returns number of elements which are greater than ikey.
* Array elements must be unique and sorted.
*/
static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,
@@ -970,9 +390,9 @@ static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,
if (lenarray == 0) return 0;
int32_t pos = binarySearch(array, lenarray, ikey);
if (pos >= 0) {
- return lenarray - (pos+1);
+ return lenarray - (pos + 1);
} else {
- return lenarray - (-pos-1);
+ return lenarray - (-pos - 1);
}
}
@@ -987,6 +407,19 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
const uint16_t *__restrict__ B, size_t s_b,
uint16_t *C);
+int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
+ const uint16_t *__restrict__ B, size_t s_b);
+
+/**
+ * Take an array container and write it out to a 32-bit array, using base
+ * as the offset.
+ */
+int array_container_to_uint32_array_vector16(void *vout, const uint16_t *array,
+ size_t cardinality, uint32_t base);
+#if CROARING_COMPILER_SUPPORTS_AVX512
+int avx512_array_container_to_uint32_array(void *vout, const uint16_t *array,
+ size_t cardinality, uint32_t base);
+#endif
/**
* Compute the cardinality of the intersection using SSE4 instructions
*/
@@ -1008,10 +441,11 @@ int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray,
const uint16_t *largearray,
size_t size_l);
-
-/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */
+/* Check whether the size of the intersection between one small and one large
+ * set of uint16_t is non-zero. */
bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s,
- const uint16_t *largearray, size_t size_l);
+ const uint16_t *largearray,
+ size_t size_l);
/**
* Generic intersection function.
*/
@@ -1027,7 +461,7 @@ int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
* Checking whether the size of the intersection is non-zero.
*/
bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
- const uint16_t *B, const size_t lenB);
+ const uint16_t *B, const size_t lenB);
/**
* Generic union function.
*/
@@ -1091,18 +525,22 @@ size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
const uint32_t *set_2, size_t size_2);
/**
-* combines union_uint16 and union_vector16 optimally
-*/
-size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
- size_t size_2, uint16_t *buffer);
-
+ * combines union_uint16 and union_vector16 optimally
+ */
+size_t fast_union_uint16(const uint16_t *set_1, size_t size_1,
+ const uint16_t *set_2, size_t size_2,
+ uint16_t *buffer);
bool memequals(const void *s1, const void *s2, size_t n);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
#endif
-
#endif
/* end file include/roaring/array_util.h */
/* begin file include/roaring/utilasm.h */
@@ -1116,7 +554,8 @@ bool memequals(const void *s1, const void *s2, size_t n);
#ifdef __cplusplus
-extern "C" { namespace roaring {
+extern "C" {
+namespace roaring {
#endif
#if defined(CROARING_INLINE_ASM)
@@ -1128,14 +567,14 @@ extern "C" { namespace roaring {
: /* write */ \
"r"(bitsReg), /* read only */ \
"r"(srcReg) /* read only */ \
- )
+ )
#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \
__asm volatile("shrx %1, %0, %0" \
: "+r"(srcReg) \
: /* read/write */ \
"r"(bitsReg) /* read only */ \
- )
+ )
#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \
__asm volatile("shlx %1, %2, %0" \
@@ -1143,7 +582,7 @@ extern "C" { namespace roaring {
: /* write */ \
"r"(bitsReg), /* read only */ \
"r"(srcReg) /* read only */ \
- )
+ )
// set bit at position testBit within testByte to 1 and
// copy cmovDst to cmovSrc if that bit was previously clear
#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \
@@ -1154,7 +593,7 @@ extern "C" { namespace roaring {
"+r"(count) \
: /* read/write */ \
"r"(testBit) /* read only */ \
- )
+ )
#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \
__asm volatile( \
@@ -1164,7 +603,7 @@ extern "C" { namespace roaring {
"+r"(count) \
: /* read/write */ \
"r"(testBit) /* read only */ \
- )
+ )
#define ASM_BT64(testByte, testBit, count) \
__asm volatile( \
@@ -1174,15 +613,16 @@ extern "C" { namespace roaring {
: /* write */ \
"r"(testByte), /* read only */ \
"r"(testBit) /* read only */ \
- )
+ )
#endif
#ifdef __cplusplus
-} } // extern "C" { namespace roaring {
+}
+} // extern "C" { namespace roaring {
#endif
-#endif /* INCLUDE_UTILASM_H_ */
+#endif /* INCLUDE_UTILASM_H_ */
/* end file include/roaring/utilasm.h */
/* begin file include/roaring/bitset_util.h */
#ifndef BITSET_UTIL_H
@@ -1191,8 +631,20 @@ extern "C" { namespace roaring {
#include <stdint.h>
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/*
@@ -1200,13 +652,12 @@ extern "C" { namespace roaring { namespace internal {
*/
static inline void bitset_set_range(uint64_t *words, uint32_t start,
uint32_t end) {
-
if (start == end) return;
uint32_t firstword = start / 64;
uint32_t endword = (end - 1) / 64, i;
if (firstword == endword) {
words[firstword] |= ((~UINT64_C(0)) << (start % 64)) &
- ((~UINT64_C(0)) >> ((~end + 1) % 64));
+ ((~UINT64_C(0)) >> ((~end + 1) % 64));
return;
}
words[firstword] |= (~UINT64_C(0)) << (start % 64);
@@ -1216,7 +667,6 @@ static inline void bitset_set_range(uint64_t *words, uint32_t start,
words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64);
}
-
/*
* Find the cardinality of the bitset in [begin,begin+lenminusone]
*/
@@ -1226,17 +676,18 @@ static inline int bitset_lenrange_cardinality(const uint64_t *words,
uint32_t firstword = start / 64;
uint32_t endword = (start + lenminusone) / 64, i;
if (firstword == endword) {
- return hamming(words[firstword] &
- ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
- << (start % 64));
+ return roaring_hamming(words[firstword] &
+ ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
+ << (start % 64));
}
- int answer = hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
- for (i = firstword + 1; i < endword; i++) {
- answer += hamming(words[i]);
+ int answer =
+ roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
+ for ( i = firstword + 1; i < endword; i++) {
+ answer += roaring_hamming(words[i]);
}
- answer +=
- hamming(words[endword] &
- (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64));
+ answer += roaring_hamming(words[endword] &
+ (~UINT64_C(0)) >>
+ (((~start + 1) - lenminusone - 1) % 64));
return answer;
}
@@ -1249,23 +700,23 @@ static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start,
uint32_t endword = (start + lenminusone) / 64, i;
if (firstword == endword) {
return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
- << (start % 64)) == 0;
+ << (start % 64)) == 0;
}
- if (((words[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) {
+ if (((words[firstword] & ((~UINT64_C(0)) << (start % 64)))) != 0) {
return false;
}
- for ( i = firstword + 1; i < endword; i++) {
+ for (i = firstword + 1; i < endword; i++) {
if (words[i] != 0) {
return false;
}
}
- if ((words[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) {
+ if ((words[endword] &
+ (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) {
return false;
}
return true;
}
-
/*
* Set all bits in indexes [begin,begin+lenminusone] to true.
*/
@@ -1275,12 +726,12 @@ static inline void bitset_set_lenrange(uint64_t *words, uint32_t start,
uint32_t endword = (start + lenminusone) / 64, i;
if (firstword == endword) {
words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
- << (start % 64);
+ << (start % 64);
return;
}
uint64_t temp = words[endword];
words[firstword] |= (~UINT64_C(0)) << (start % 64);
- for ( i = firstword + 1; i < endword; i += 2)
+ for (i = firstword + 1; i < endword; i += 2)
words[i] = words[i + 1] = ~UINT64_C(0);
words[endword] =
temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
@@ -1295,7 +746,7 @@ static inline void bitset_flip_range(uint64_t *words, uint32_t start,
uint32_t firstword = start / 64;
uint32_t endword = (end - 1) / 64, i;
words[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
- for ( i = firstword; i < endword; i++) {
+ for (i = firstword; i < endword; i++) {
words[i] = ~words[i];
}
words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
@@ -1311,11 +762,11 @@ static inline void bitset_reset_range(uint64_t *words, uint32_t start,
uint32_t endword = (end - 1) / 64, i;
if (firstword == endword) {
words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) &
- ((~UINT64_C(0)) >> ((~end + 1) % 64)));
+ ((~UINT64_C(0)) >> ((~end + 1) % 64)));
return;
}
words[firstword] &= ~((~UINT64_C(0)) << (start % 64));
- for ( i = firstword + 1; i < endword; i++) {
+ for (i = firstword + 1; i < endword; i++) {
words[i] = UINT64_C(0);
}
words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64));
@@ -1340,6 +791,9 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
uint32_t *out, size_t outcapacity,
uint32_t base);
+size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length,
+ uint32_t *out, size_t outcapacity,
+ uint32_t base);
/*
* Given a bitset containing "length" 64-bit words, write out the position
* of all the set bits to "out", values start at "base".
@@ -1372,6 +826,10 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
uint16_t *out, size_t outcapacity,
uint16_t base);
+size_t bitset_extract_setbits_avx512_uint16(const uint64_t *words,
+ size_t length, uint16_t *out,
+ size_t outcapacity, uint16_t base);
+
/*
* Given a bitset containing "length" 64-bit words, write out the position
* of all the set bits to "out", values start at "base"
@@ -1395,10 +853,9 @@ size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
*
* Returns how many values were actually decoded.
*/
-size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
- const uint64_t * __restrict__ words2,
- size_t length, uint16_t *out,
- uint16_t base);
+size_t bitset_extract_intersection_setbits_uint16(
+ const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2,
+ size_t length, uint16_t *out, uint16_t base);
/*
* Given a bitset having cardinality card, set all bit values in the list (there
@@ -1435,7 +892,7 @@ uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length);
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
/***
* BEGIN Harley-Seal popcount functions.
*/
@@ -1473,7 +930,7 @@ static inline __m256i popcount256(__m256i v) {
const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi);
return _mm256_sad_epu8(popcnt1, popcnt2);
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
/**
@@ -1485,7 +942,7 @@ static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b,
*h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c));
*l = _mm256_xor_si256(u, c);
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
/**
@@ -1549,7 +1006,7 @@ inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data,
(uint64_t)(_mm256_extract_epi64(total, 2)) +
(uint64_t)(_mm256_extract_epi64(total, 3));
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
#define AVXPOPCNTFNC(opname, avx_intrinsic) \
static inline uint64_t avx2_harley_seal_popcount256_##opname( \
@@ -1733,28 +1190,142 @@ CROARING_UNTARGET_REGION
CROARING_TARGET_AVX2
AVXPOPCNTFNC(or, _mm256_or_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
AVXPOPCNTFNC(union, _mm256_or_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
AVXPOPCNTFNC(and, _mm256_and_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
AVXPOPCNTFNC(intersection, _mm256_and_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVXPOPCNTFNC (xor, _mm256_xor_si256)
-CROARING_UNTARGET_REGION
+AVXPOPCNTFNC(xor, _mm256_xor_si256)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
AVXPOPCNTFNC(andnot, _mm256_andnot_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
+#define VPOPCNT_AND_ADD(ptr, i, accu) \
+ const __m512i v##i = _mm512_loadu_si512((const __m512i *)ptr + i); \
+ const __m512i p##i = _mm512_popcnt_epi64(v##i); \
+ accu = _mm512_add_epi64(accu, p##i);
+
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+static inline uint64_t sum_epu64_256(const __m256i v) {
+ return (uint64_t)(_mm256_extract_epi64(v, 0)) +
+ (uint64_t)(_mm256_extract_epi64(v, 1)) +
+ (uint64_t)(_mm256_extract_epi64(v, 2)) +
+ (uint64_t)(_mm256_extract_epi64(v, 3));
+}
+
+static inline uint64_t simd_sum_epu64(const __m512i v) {
+ __m256i lo = _mm512_extracti64x4_epi64(v, 0);
+ __m256i hi = _mm512_extracti64x4_epi64(v, 1);
+
+ return sum_epu64_256(lo) + sum_epu64_256(hi);
+}
+
+static inline uint64_t avx512_vpopcount(const __m512i *data,
+ const uint64_t size) {
+ const uint64_t limit = size - size % 4;
+ __m512i total = _mm512_setzero_si512();
+ uint64_t i = 0;
+
+ for (; i < limit; i += 4) {
+ VPOPCNT_AND_ADD(data + i, 0, total);
+ VPOPCNT_AND_ADD(data + i, 1, total);
+ VPOPCNT_AND_ADD(data + i, 2, total);
+ VPOPCNT_AND_ADD(data + i, 3, total);
+ }
+
+ for (; i < size; i++) {
+ total = _mm512_add_epi64(
+ total, _mm512_popcnt_epi64(_mm512_loadu_si512(data + i)));
+ }
+
+ return simd_sum_epu64(total);
+}
+CROARING_UNTARGET_AVX512
+#endif
+
+#define AVXPOPCNTFNC512(opname, avx_intrinsic) \
+ static inline uint64_t avx512_harley_seal_popcount512_##opname( \
+ const __m512i *data1, const __m512i *data2, const uint64_t size) { \
+ __m512i total = _mm512_setzero_si512(); \
+ const uint64_t limit = size - size % 4; \
+ uint64_t i = 0; \
+ for (; i < limit; i += 4) { \
+ __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i), \
+ _mm512_loadu_si512(data2 + i)); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1)); \
+ __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1), \
+ _mm512_loadu_si512(data2 + i + 1)); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2)); \
+ __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2), \
+ _mm512_loadu_si512(data2 + i + 2)); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3)); \
+ __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3), \
+ _mm512_loadu_si512(data2 + i + 3)); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4)); \
+ } \
+ for (; i < size; i++) { \
+ __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i), \
+ _mm512_loadu_si512(data2 + i)); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a)); \
+ } \
+ return simd_sum_epu64(total); \
+ } \
+ static inline uint64_t avx512_harley_seal_popcount512andstore_##opname( \
+ const __m512i *__restrict__ data1, const __m512i *__restrict__ data2, \
+ __m512i *__restrict__ out, const uint64_t size) { \
+ __m512i total = _mm512_setzero_si512(); \
+ const uint64_t limit = size - size % 4; \
+ uint64_t i = 0; \
+ for (; i < limit; i += 4) { \
+ __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i), \
+ _mm512_loadu_si512(data2 + i)); \
+ _mm512_storeu_si512(out + i, a1); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1)); \
+ __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1), \
+ _mm512_loadu_si512(data2 + i + 1)); \
+ _mm512_storeu_si512(out + i + 1, a2); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2)); \
+ __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2), \
+ _mm512_loadu_si512(data2 + i + 2)); \
+ _mm512_storeu_si512(out + i + 2, a3); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3)); \
+ __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3), \
+ _mm512_loadu_si512(data2 + i + 3)); \
+ _mm512_storeu_si512(out + i + 3, a4); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4)); \
+ } \
+ for (; i < size; i++) { \
+ __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i), \
+ _mm512_loadu_si512(data2 + i)); \
+ _mm512_storeu_si512(out + i, a); \
+ total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a)); \
+ } \
+ return simd_sum_epu64(total); \
+ }
+
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+AVXPOPCNTFNC512(or, _mm512_or_si512)
+AVXPOPCNTFNC512(union, _mm512_or_si512)
+AVXPOPCNTFNC512(and, _mm512_and_si512)
+AVXPOPCNTFNC512(intersection, _mm512_and_si512)
+AVXPOPCNTFNC512(xor, _mm512_xor_si512)
+AVXPOPCNTFNC512(andnot, _mm512_andnot_si512)
+CROARING_UNTARGET_AVX512
+#endif
/***
* END Harley-Seal popcount functions.
*/
@@ -1762,9 +1333,13 @@ CROARING_UNTARGET_REGION
#endif // CROARING_IS_X64
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal
+}
+}
+} // extern "C" { namespace roaring { namespace internal
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
#endif
-
#endif
/* end file include/roaring/bitset_util.h */
/* begin file include/roaring/containers/array.h */
@@ -1779,9 +1354,11 @@ CROARING_UNTARGET_REGION
#include <string.h>
+// Include other headers after roaring_types.h
#ifdef __cplusplus
-extern "C" { namespace roaring {
+extern "C" {
+namespace roaring {
// Note: in pure C++ code, you should avoid putting `using` in header files
using api::roaring_iterator;
@@ -1807,8 +1384,8 @@ STRUCT_CONTAINER(array_container_s) {
typedef struct array_container_s array_container_t;
-#define CAST_array(c) CAST(array_container_t *, c) // safer downcast
-#define const_CAST_array(c) CAST(const array_container_t *, c)
+#define CAST_array(c) CAST(array_container_t *, c) // safer downcast
+#define const_CAST_array(c) CAST(const array_container_t *, c)
#define movable_CAST_array(c) movable_CAST(array_container_t **, c)
/* Create a new array with default. Return NULL in case of failure. See also
@@ -1820,7 +1397,7 @@ array_container_t *array_container_create(void);
array_container_t *array_container_create_given_capacity(int32_t size);
/* Create a new array containing all values in [min,max). */
-array_container_t * array_container_create_range(uint32_t min, uint32_t max);
+array_container_t *array_container_create_range(uint32_t min, uint32_t max);
/*
* Shrink the capacity to the actual size, return the number of bytes saved.
@@ -1853,18 +1430,16 @@ void array_container_copy(const array_container_t *src, array_container_t *dst);
void array_container_add_from_range(array_container_t *arr, uint32_t min,
uint32_t max, uint16_t step);
-
static inline bool array_container_empty(const array_container_t *array) {
return array->cardinality == 0;
}
/* check whether the cardinality is equal to the capacity (this does not mean
-* that it contains 1<<16 elements) */
+ * that it contains 1<<16 elements) */
static inline bool array_container_full(const array_container_t *array) {
return array->cardinality == array->capacity;
}
-
/* Compute the union of `src_1' and `src_2' and write the result to `dst'
* It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
void array_container_union(const array_container_t *src_1,
@@ -1884,8 +1459,7 @@ void array_container_intersection(const array_container_t *src_1,
/* Check whether src_1 and src_2 intersect. */
bool array_container_intersect(const array_container_t *src_1,
- const array_container_t *src_2);
-
+ const array_container_t *src_2);
/* computers the size of the intersection between two arrays.
*/
@@ -1912,8 +1486,8 @@ int array_container_to_uint32_array(void *vout, const array_container_t *cont,
/* Compute the number of runs */
int32_t array_container_number_of_runs(const array_container_t *ac);
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
-/*
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES/*
* Print this container using printf (useful for debugging).
*/
void array_container_printf(const array_container_t *v);
@@ -1925,6 +1499,8 @@ void array_container_printf(const array_container_t *v);
void array_container_printf_as_uint32_array(const array_container_t *v,
uint32_t base);
#endif
+
+bool array_container_validate(const array_container_t *v, const char **reason);
/**
* Return the serialized size in bytes of a container having cardinality "card".
@@ -1975,6 +1551,7 @@ int32_t array_container_read(int32_t cardinality, array_container_t *container,
* that the cardinality of the container is already known.
*
*/
+ALLOW_UNALIGNED
static inline int32_t array_container_size_in_bytes(
const array_container_t *container) {
return container->cardinality * sizeof(uint16_t);
@@ -1984,14 +1561,13 @@ static inline int32_t array_container_size_in_bytes(
* Return true if the two arrays have the same content.
*/
ALLOW_UNALIGNED
-static inline bool array_container_equals(
- const array_container_t *container1,
- const array_container_t *container2) {
-
+static inline bool array_container_equals(const array_container_t *container1,
+ const array_container_t *container2) {
if (container1->cardinality != container2->cardinality) {
return false;
}
- return memequals(container1->array, container2->array, container1->cardinality*2);
+ return memequals(container1->array, container2->array,
+ container1->cardinality * 2);
}
/**
@@ -2047,7 +1623,8 @@ static inline void array_container_append(array_container_t *arr,
* 0 -- value was already present
* -1 -- value was not added because cardinality would exceed max_cardinality
*/
-static inline int array_container_try_add(array_container_t *arr, uint16_t value,
+static inline int array_container_try_add(array_container_t *arr,
+ uint16_t value,
int32_t max_cardinality) {
const int32_t cardinality = arr->cardinality;
@@ -2102,12 +1679,13 @@ inline bool array_container_contains(const array_container_t *arr,
// return binarySearch(arr->array, arr->cardinality, pos) >= 0;
// binary search with fallback to linear search for short ranges
int32_t low = 0;
- int i;
- const uint16_t * carr = (const uint16_t *) arr->array;
+ const uint16_t *carr = (const uint16_t *)arr->array;
int32_t high = arr->cardinality - 1;
+ int i;
+
// while (high - low >= 0) {
- while(high >= low + 16) {
- int32_t middleIndex = (low + high)>>1;
+ while (high >= low + 16) {
+ int32_t middleIndex = (low + high) >> 1;
uint16_t middleValue = carr[middleIndex];
if (middleValue < pos) {
low = middleIndex + 1;
@@ -2118,27 +1696,27 @@ inline bool array_container_contains(const array_container_t *arr,
}
}
- for (i=low; i <= high; i++) {
+ for (i = low; i <= high; i++) {
uint16_t v = carr[i];
if (v == pos) {
return true;
}
- if ( v > pos ) return false;
+ if (v > pos) return false;
}
return false;
-
}
-void array_container_offset(const array_container_t *c,
- container_t **loc, container_t **hic,
- uint16_t offset);
+void array_container_offset(const array_container_t *c, container_t **loc,
+ container_t **hic, uint16_t offset);
-//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */
+//* Check whether a range of values from range_start (included) to range_end
+//(excluded) is present. */
static inline bool array_container_contains_range(const array_container_t *arr,
- uint32_t range_start, uint32_t range_end) {
+ uint32_t range_start,
+ uint32_t range_end) {
const int32_t range_count = range_end - range_start;
- const uint16_t rs_included = range_start;
- const uint16_t re_included = range_end - 1;
+ const uint16_t rs_included = (uint16_t)range_start;
+ const uint16_t re_included = (uint16_t)(range_end - 1);
// Empty range is always included
if (range_count <= 0) {
@@ -2148,10 +1726,12 @@ static inline bool array_container_contains_range(const array_container_t *arr,
return false;
}
- const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included);
+ const int32_t start =
+ binarySearch(arr->array, arr->cardinality, rs_included);
// If this sorted array contains all items in the range:
// * the start item must be found
- // * the last item in range range_count must exist, and be the expected end value
+ // * the last item in range range_count must exist, and be the expected end
+ // value
return (start >= 0) && (arr->cardinality >= start + range_count) &&
(arr->array[start + range_count - 1] == re_included);
}
@@ -2179,15 +1759,54 @@ inline int array_container_rank(const array_container_t *arr, uint16_t x) {
}
}
-/* Returns the index of the first value equal or smaller than x, or -1 */
-inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
+/* bulk version of array_container_rank(); return number of consumed elements
+ */
+inline uint32_t array_container_rank_many(const array_container_t *arr,
+ uint64_t start_rank,
+ const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans) {
+ const uint16_t high = (uint16_t)((*begin) >> 16);
+ uint32_t pos = 0;
+ const uint32_t *iter = begin;
+ for (; iter != end; iter++) {
+ uint32_t x = *iter;
+ uint16_t xhigh = (uint16_t)(x >> 16);
+ if (xhigh != high) return iter - begin; // stop at next container
+
+ const int32_t idx =
+ binarySearch(arr->array + pos, arr->cardinality - pos, (uint16_t)x);
+ const bool is_present = idx >= 0;
+ if (is_present) {
+ *(ans++) = start_rank + pos + (idx + 1);
+ pos = idx + 1;
+ } else {
+ *(ans++) = start_rank + pos + (-idx - 1);
+ }
+ }
+ return iter - begin;
+}
+
+/* Returns the index of x , if not exsist return -1 */
+inline int array_container_get_index(const array_container_t *arr, uint16_t x) {
const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
const bool is_present = idx >= 0;
if (is_present) {
return idx;
} else {
- int32_t candidate = - idx - 1;
- if(candidate < arr->cardinality) return candidate;
+ return -1;
+ }
+}
+
+/* Returns the index of the first value equal or larger than x, or -1 */
+inline int array_container_index_equalorlarger(const array_container_t *arr,
+ uint16_t x) {
+ const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
+ const bool is_present = idx >= 0;
+ if (is_present) {
+ return idx;
+ } else {
+ int32_t candidate = -idx - 1;
+ if (candidate < arr->cardinality) return candidate;
return -1;
}
}
@@ -2201,16 +1820,16 @@ static inline void array_container_add_range_nvals(array_container_t *array,
uint32_t min, uint32_t max,
int32_t nvals_less,
int32_t nvals_greater) {
- int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
- uint32_t i;
+ int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
+ uint32_t i;
if (union_cardinality > array->capacity) {
array_container_grow(array, union_cardinality, true);
}
memmove(&(array->array[union_cardinality - nvals_greater]),
&(array->array[array->cardinality - nvals_greater]),
nvals_greater * sizeof(uint16_t));
- for ( i = 0; i <= max - min; i++) {
- array->array[nvals_less + i] = min + i;
+ for (i = 0; i <= max - min; i++) {
+ array->array[nvals_less + i] = (uint16_t)(min + i);
}
array->cardinality = union_cardinality;
}
@@ -2221,9 +1840,10 @@ static inline void array_container_add_range_nvals(array_container_t *array,
*/
/*static inline void array_container_add_range(array_container_t *array,
uint32_t min, uint32_t max) {
- int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
- int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
- array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
+ int32_t nvals_greater = count_greater(array->array, array->cardinality,
+max); int32_t nvals_less = count_less(array->array, array->cardinality -
+nvals_greater, min); array_container_add_range_nvals(array, min, max,
+nvals_less, nvals_greater);
}*/
/*
@@ -2231,15 +1851,17 @@ static inline void array_container_add_range_nvals(array_container_t *array,
*/
static inline void array_container_remove_range(array_container_t *array,
uint32_t pos, uint32_t count) {
- if (count != 0) {
- memmove(&(array->array[pos]), &(array->array[pos+count]),
- (array->cardinality - pos - count) * sizeof(uint16_t));
- array->cardinality -= count;
- }
+ if (count != 0) {
+ memmove(&(array->array[pos]), &(array->array[pos + count]),
+ (array->cardinality - pos - count) * sizeof(uint16_t));
+ array->cardinality -= count;
+ }
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */
@@ -2257,9 +1879,11 @@ static inline void array_container_remove_range(array_container_t *array,
#include <stdint.h>
+// Include other headers after roaring_types.h
#ifdef __cplusplus
-extern "C" { namespace roaring {
+extern "C" {
+namespace roaring {
// Note: in pure C++ code, you should avoid putting `using` in header files
using api::roaring_iterator;
@@ -2268,8 +1892,6 @@ using api::roaring_iterator64;
namespace internal {
#endif
-
-
enum {
BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64,
BITSET_UNKNOWN_CARDINALITY = -1
@@ -2282,8 +1904,8 @@ STRUCT_CONTAINER(bitset_container_s) {
typedef struct bitset_container_s bitset_container_t;
-#define CAST_bitset(c) CAST(bitset_container_t *, c) // safer downcast
-#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
+#define CAST_bitset(c) CAST(bitset_container_t *, c) // safer downcast
+#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
#define movable_CAST_bitset(c) movable_CAST(bitset_container_t **, c)
/* Create a new bitset. Return NULL in case of failure. */
@@ -2431,28 +2053,29 @@ inline bool bitset_container_get(const bitset_container_t *bitset,
#endif
/*
-* Check if all bits are set in a range of positions from pos_start (included) to
-* pos_end (excluded).
-*/
+ * Check if all bits are set in a range of positions from pos_start (included)
+ * to pos_end (excluded).
+ */
static inline bool bitset_container_get_range(const bitset_container_t *bitset,
- uint32_t pos_start, uint32_t pos_end) {
-
+ uint32_t pos_start,
+ uint32_t pos_end) {
const uint32_t start = pos_start >> 6;
const uint32_t end = pos_end >> 6;
- uint16_t i;
+ uint32_t i;
const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1);
const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1;
- if (start == end) return ((bitset->words[end] & first & last) == (first & last));
+ if (start == end)
+ return ((bitset->words[end] & first & last) == (first & last));
if ((bitset->words[start] & first) != first) return false;
- if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->words[end] & last) != last)){
-
+ if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) &&
+ ((bitset->words[end] & last) != last)) {
return false;
}
- for (i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){
-
+ for (i = start + 1;
+ (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i) {
if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
}
@@ -2466,11 +2089,11 @@ inline bool bitset_container_contains(const bitset_container_t *bitset,
}
/*
-* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded)
-* is present in `bitset'. Calls bitset_container_get_all.
-*/
-static inline bool bitset_container_contains_range(const bitset_container_t *bitset,
- uint32_t pos_start, uint32_t pos_end) {
+ * Check whether a range of bits from position `pos_start' (included) to
+ * `pos_end' (excluded) is present in `bitset'. Calls bitset_container_get_all.
+ */
+static inline bool bitset_container_contains_range(
+ const bitset_container_t *bitset, uint32_t pos_start, uint32_t pos_end) {
return bitset_container_get_range(bitset, pos_start, pos_end);
}
@@ -2481,9 +2104,6 @@ static inline int bitset_container_cardinality(
return bitset->cardinality;
}
-
-
-
/* Copy one container into another. We assume that they are distinct. */
void bitset_container_copy(const bitset_container_t *source,
bitset_container_t *dest);
@@ -2500,21 +2120,19 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset);
/* Check whether this bitset is empty,
* it never modifies the bitset struct. */
-static inline bool bitset_container_empty(
- const bitset_container_t *bitset) {
- if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
- int i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
- if((bitset->words[i]) != 0) return false;
- }
- return true;
- }
- return bitset->cardinality == 0;
+static inline bool bitset_container_empty(const bitset_container_t *bitset) {
+ if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
+ int i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) {
+ if ((bitset->words[i]) != 0) return false;
+ }
+ return true;
+ }
+ return bitset->cardinality == 0;
}
-
-/* Get whether there is at least one bit set (see bitset_container_empty for the reverse),
- the bitset is never modified */
+/* Get whether there is at least one bit set (see bitset_container_empty for
+ the reverse), the bitset is never modified */
static inline bool bitset_container_const_nonzero_cardinality(
const bitset_container_t *bitset) {
return !bitset_container_empty(bitset);
@@ -2524,7 +2142,7 @@ static inline bool bitset_container_const_nonzero_cardinality(
* Check whether the two bitsets intersect
*/
bool bitset_container_intersect(const bitset_container_t *src_1,
- const bitset_container_t *src_2);
+ const bitset_container_t *src_2);
/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
* cardinality. */
@@ -2548,6 +2166,12 @@ int bitset_container_union(const bitset_container_t *src_1,
int bitset_container_union_justcard(const bitset_container_t *src_1,
const bitset_container_t *src_2);
+/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_union_nocard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
* update the cardinality. Provided to optimize chained operations. */
int bitset_container_or_nocard(const bitset_container_t *src_1,
@@ -2578,6 +2202,12 @@ int bitset_container_intersection_justcard(const bitset_container_t *src_1,
/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
* not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_intersection_nocard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
int bitset_container_and_nocard(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
@@ -2616,9 +2246,8 @@ int bitset_container_andnot_nocard(const bitset_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst);
-void bitset_container_offset(const bitset_container_t *c,
- container_t **loc, container_t **hic,
- uint16_t offset);
+void bitset_container_offset(const bitset_container_t *c, container_t **loc,
+ container_t **hic, uint16_t offset);
/*
* Write out the 16-bit integers contained in this container as a list of 32-bit
* integers using base
@@ -2633,7 +2262,7 @@ int bitset_container_to_uint32_array(uint32_t *out,
const bitset_container_t *bc,
uint32_t base);
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+ #ifdef NDPI_ENABLE_DEBUG_MESSAGES
/*
* Print this container using printf (useful for debugging).
*/
@@ -2646,6 +2275,9 @@ void bitset_container_printf(const bitset_container_t *v);
void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
uint32_t base);
#endif
+
+bool bitset_container_validate(const bitset_container_t *v,
+ const char **reason);
/**
* Return the serialized size in bytes of a container.
@@ -2703,8 +2335,8 @@ bool bitset_container_equals(const bitset_container_t *container1,
const bitset_container_t *container2);
/**
-* Return true if container1 is a subset of container2.
-*/
+ * Return true if container1 is a subset of container2.
+ */
bool bitset_container_is_subset(const bitset_container_t *container1,
const bitset_container_t *container2);
@@ -2727,11 +2359,23 @@ uint16_t bitset_container_maximum(const bitset_container_t *container);
/* Returns the number of values equal or smaller than x */
int bitset_container_rank(const bitset_container_t *container, uint16_t x);
+/* bulk version of bitset_container_rank(); return number of consumed elements
+ */
+uint32_t bitset_container_rank_many(const bitset_container_t *container,
+ uint64_t start_rank, const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans);
+
+/* Returns the index of x , if not exsist return -1 */
+int bitset_container_get_index(const bitset_container_t *container, uint16_t x);
+
/* Returns the index of the first value equal or larger than x, or -1 */
-int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
+int bitset_container_index_equalorlarger(const bitset_container_t *container,
+ uint16_t x);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_BITSET_H_ */
@@ -2745,15 +2389,17 @@ int bitset_container_index_equalorlarger(const bitset_container_t *container, ui
#ifndef INCLUDE_CONTAINERS_RUN_H_
#define INCLUDE_CONTAINERS_RUN_H_
+
+// Include other headers after roaring_types.h
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
-
#ifdef __cplusplus
-extern "C" { namespace roaring {
+extern "C" {
+namespace roaring {
// Note: in pure C++ code, you should avoid putting `using` in header files
using api::roaring_iterator;
@@ -2778,11 +2424,11 @@ struct rle16_s {
typedef struct rle16_s rle16_t;
#ifdef __cplusplus
- #define MAKE_RLE16(val,len) \
- {(uint16_t)(val), (uint16_t)(len)} // no tagged structs until c++20
+#define MAKE_RLE16(val, len) \
+ { (uint16_t)(val), (uint16_t)(len) } // no tagged structs until c++20
#else
- #define MAKE_RLE16(val,len) \
- (rle16_t){.value = (uint16_t)(val), .length = (uint16_t)(len)}
+#define MAKE_RLE16(val, len) \
+ (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
#endif
/* struct run_container_s - run container bitmap
@@ -2799,8 +2445,8 @@ STRUCT_CONTAINER(run_container_s) {
typedef struct run_container_s run_container_t;
-#define CAST_run(c) CAST(run_container_t *, c) // safer downcast
-#define const_CAST_run(c) CAST(const run_container_t *, c)
+#define CAST_run(c) CAST(run_container_t *, c) // safer downcast
+#define const_CAST_run(c) CAST(const run_container_t *, c)
#define movable_CAST_run(c) movable_CAST(run_container_t **, c)
/* Create a new run container. Return NULL in case of failure. */
@@ -2873,13 +2519,12 @@ static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray,
return -(low + 1);
}
-
/**
* Returns number of runs which can'be be merged with the key because they
* are less than the key.
* Note that [5,6,7,8] can be merged with the key 9 and won't be counted.
*/
-static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray,
+static inline int32_t rle16_count_less(const rle16_t *array, int32_t lenarray,
uint16_t key) {
if (lenarray == 0) return 0;
int32_t low = 0;
@@ -2887,8 +2532,9 @@ static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray,
while (low <= high) {
int32_t middleIndex = (low + high) >> 1;
uint16_t min_value = array[middleIndex].value;
- uint16_t max_value = array[middleIndex].value + array[middleIndex].length;
- if (max_value + UINT32_C(1) < key) { // uint32 arithmetic
+ uint16_t max_value =
+ array[middleIndex].value + array[middleIndex].length;
+ if (max_value + UINT32_C(1) < key) { // uint32 arithmetic
low = middleIndex + 1;
} else if (key < min_value) {
high = middleIndex - 1;
@@ -2899,18 +2545,19 @@ static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray,
return low;
}
-static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray,
- uint16_t key) {
+static inline int32_t rle16_count_greater(const rle16_t *array,
+ int32_t lenarray, uint16_t key) {
if (lenarray == 0) return 0;
int32_t low = 0;
int32_t high = lenarray - 1;
while (low <= high) {
int32_t middleIndex = (low + high) >> 1;
uint16_t min_value = array[middleIndex].value;
- uint16_t max_value = array[middleIndex].value + array[middleIndex].length;
+ uint16_t max_value =
+ array[middleIndex].value + array[middleIndex].length;
if (max_value < key) {
low = middleIndex + 1;
- } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic
+ } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic
high = middleIndex - 1;
} else {
return lenarray - (middleIndex + 1);
@@ -2994,16 +2641,20 @@ inline bool run_container_contains(const run_container_t *run, uint16_t pos) {
}
/*
-* Check whether all positions in a range of positions from pos_start (included)
-* to pos_end (excluded) is present in `run'.
-*/
+ * Check whether all positions in a range of positions from pos_start (included)
+ * to pos_end (excluded) is present in `run'.
+ */
static inline bool run_container_contains_range(const run_container_t *run,
- uint32_t pos_start, uint32_t pos_end) {
+ uint32_t pos_start,
+ uint32_t pos_end) {
uint32_t count = 0;
- int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start), i;
+ int32_t i;
+ int32_t index =
+ interleavedBinarySearch(run->runs, run->n_runs, (uint16_t)pos_start);
if (index < 0) {
index = -index - 2;
- if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){
+ if ((index == -1) ||
+ ((pos_start - run->runs[index].value) > run->runs[index].length)) {
return false;
}
}
@@ -3011,7 +2662,9 @@ static inline bool run_container_contains_range(const run_container_t *run,
const uint32_t stop = run->runs[i].value + run->runs[i].length;
if (run->runs[i].value >= pos_end) break;
if (stop >= pos_end) {
- count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0);
+ count += (((pos_end - run->runs[i].value) > 0)
+ ? (pos_end - run->runs[i].value)
+ : 0);
break;
}
const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0;
@@ -3030,13 +2683,10 @@ static inline bool run_container_nonzero_cardinality(
}
/* Card == 0?, see run_container_nonzero_cardinality for the reverse */
-static inline bool run_container_empty(
- const run_container_t *run) {
+static inline bool run_container_empty(const run_container_t *run) {
return run->n_runs == 0; // runs never empty
}
-
-
/* Copy one container into another. We assume that they are distinct. */
void run_container_copy(const run_container_t *src, run_container_t *dst);
@@ -3139,7 +2789,7 @@ int run_container_intersection_cardinality(const run_container_t *src_1,
/* Check whether src_1 and src_2 intersect. */
bool run_container_intersect(const run_container_t *src_1,
- const run_container_t *src_2);
+ const run_container_t *src_2);
/* Compute the symmetric difference of `src_1' and `src_2' and write the result
* to `dst'
@@ -3171,6 +2821,8 @@ void run_container_printf(const run_container_t *v);
void run_container_printf_as_uint32_array(const run_container_t *v,
uint32_t base);
#endif
+
+bool run_container_validate(const run_container_t *run, const char **reason);
/**
* Return the serialized size in bytes of a container having "num_runs" runs.
@@ -3210,6 +2862,7 @@ int32_t run_container_read(int32_t cardinality, run_container_t *container,
* Return the serialized size in bytes of a container (see run_container_write).
* This is meant to be compatible with the Java and Go versions of Roaring.
*/
+ALLOW_UNALIGNED
static inline int32_t run_container_size_in_bytes(
const run_container_t *container) {
return run_container_serialized_size_in_bytes(container->n_runs);
@@ -3220,7 +2873,7 @@ static inline int32_t run_container_size_in_bytes(
*/
ALLOW_UNALIGNED
static inline bool run_container_equals(const run_container_t *container1,
- const run_container_t *container2) {
+ const run_container_t *container2) {
if (container1->n_runs != container2->n_runs) {
return false;
}
@@ -3229,8 +2882,8 @@ static inline bool run_container_equals(const run_container_t *container1,
}
/**
-* Return true if container1 is a subset of container2.
-*/
+ * Return true if container1 is a subset of container2.
+ */
bool run_container_is_subset(const run_container_t *container1,
const run_container_t *container2);
@@ -3243,12 +2896,12 @@ void run_container_smart_append_exclusive(run_container_t *src,
const uint16_t length);
/**
-* The new container consists of a single run [start,stop).
-* It is required that stop>start, the caller is responsability for this check.
-* It is required that stop <= (1<<16), the caller is responsability for this check.
-* The cardinality of the created container is stop - start.
-* Returns NULL on failure
-*/
+ * The new container consists of a single run [start,stop).
+ * It is required that stop>start, the caller is responsability for this check.
+ * It is required that stop <= (1<<16), the caller is responsability for this
+ * check. The cardinality of the created container is stop - start. Returns NULL
+ * on failure
+ */
static inline run_container_t *run_container_create_range(uint32_t start,
uint32_t stop) {
run_container_t *rc = run_container_create_given_capacity(1);
@@ -3277,9 +2930,8 @@ bool run_container_select(const run_container_t *container,
void run_container_andnot(const run_container_t *src_1,
const run_container_t *src_2, run_container_t *dst);
-void run_container_offset(const run_container_t *c,
- container_t **loc, container_t **hic,
- uint16_t offset);
+void run_container_offset(const run_container_t *c, container_t **loc,
+ container_t **hic, uint16_t offset);
/* Returns the smallest value (assumes not empty) */
inline uint16_t run_container_minimum(const run_container_t *run) {
@@ -3296,8 +2948,18 @@ inline uint16_t run_container_maximum(const run_container_t *run) {
/* Returns the number of values equal or smaller than x */
int run_container_rank(const run_container_t *arr, uint16_t x);
-/* Returns the index of the first run containing a value at least as large as x, or -1 */
-inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) {
+/* bulk version of run_container_rank(); return number of consumed elements */
+uint32_t run_container_rank_many(const run_container_t *arr,
+ uint64_t start_rank, const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans);
+
+/* Returns the index of x, if not exsist return -1 */
+int run_container_get_index(const run_container_t *arr, uint16_t x);
+
+/* Returns the index of the first run containing a value at least as large as x,
+ * or -1 */
+inline int run_container_index_equalorlarger(const run_container_t *arr,
+ uint16_t x) {
int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x);
if (index >= 0) return index;
index = -index - 2; // points to preceding run, possibly -1
@@ -3307,8 +2969,8 @@ inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_
if (offset <= le) return index;
}
index += 1;
- if(index < arr->n_runs) {
- return index;
+ if (index < arr->n_runs) {
+ return index;
}
return -1;
}
@@ -3316,15 +2978,15 @@ inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_
/*
* Add all values in range [min, max] using hint.
*/
-static inline void run_container_add_range_nruns(run_container_t* run,
+static inline void run_container_add_range_nruns(run_container_t *run,
uint32_t min, uint32_t max,
int32_t nruns_less,
int32_t nruns_greater) {
int32_t nruns_common = run->n_runs - nruns_less - nruns_greater;
if (nruns_common == 0) {
- makeRoomAtIndex(run, nruns_less);
- run->runs[nruns_less].value = min;
- run->runs[nruns_less].length = max - min;
+ makeRoomAtIndex(run, (uint16_t)nruns_less);
+ run->runs[nruns_less].value = (uint16_t)min;
+ run->runs[nruns_less].length = (uint16_t)(max - min);
} else {
uint32_t common_min = run->runs[nruns_less].value;
uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value +
@@ -3332,12 +2994,12 @@ static inline void run_container_add_range_nruns(run_container_t* run,
uint32_t result_min = (common_min < min) ? common_min : min;
uint32_t result_max = (common_max > max) ? common_max : max;
- run->runs[nruns_less].value = result_min;
- run->runs[nruns_less].length = result_max - result_min;
+ run->runs[nruns_less].value = (uint16_t)result_min;
+ run->runs[nruns_less].length = (uint16_t)(result_max - result_min);
memmove(&(run->runs[nruns_less + 1]),
&(run->runs[run->n_runs - nruns_greater]),
- nruns_greater*sizeof(rle16_t));
+ nruns_greater * sizeof(rle16_t));
run->n_runs = nruns_less + 1 + nruns_greater;
}
}
@@ -3349,44 +3011,52 @@ static inline void run_container_add_range_nruns(run_container_t* run,
/*static inline void run_container_add_range(run_container_t* run,
uint32_t min, uint32_t max) {
int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
- int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
- run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
+ int32_t nruns_less = rle16_count_less(run->runs, run->n_runs -
+nruns_greater, min); run_container_add_range_nruns(run, min, max, nruns_less,
+nruns_greater);
}*/
/**
- * Shifts last $count elements either left (distance < 0) or right (distance > 0)
+ * Shifts last $count elements either left (distance < 0) or right (distance >
+ * 0)
*/
-static inline void run_container_shift_tail(run_container_t* run,
- int32_t count, int32_t distance) {
+static inline void run_container_shift_tail(run_container_t *run, int32_t count,
+ int32_t distance) {
if (distance > 0) {
- if (run->capacity < count+distance) {
- run_container_grow(run, count+distance, true);
+ if (run->capacity < count + distance) {
+ run_container_grow(run, count + distance, true);
}
}
int32_t srcpos = run->n_runs - count;
int32_t dstpos = srcpos + distance;
- memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count);
+ memmove(&(run->runs[dstpos]), &(run->runs[srcpos]),
+ sizeof(rle16_t) * count);
run->n_runs += distance;
}
/**
* Remove all elements in range [min, max]
*/
-static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) {
- int32_t first = rle16_find_run(run->runs, run->n_runs, min);
- int32_t last = rle16_find_run(run->runs, run->n_runs, max);
+static inline void run_container_remove_range(run_container_t *run,
+ uint32_t min, uint32_t max) {
+ int32_t first = rle16_find_run(run->runs, run->n_runs, (uint16_t)min);
+ int32_t last = rle16_find_run(run->runs, run->n_runs, (uint16_t)max);
if (first >= 0 && min > run->runs[first].value &&
- max < ((uint32_t)run->runs[first].value + (uint32_t)run->runs[first].length)) {
+ max < ((uint32_t)run->runs[first].value +
+ (uint32_t)run->runs[first].length)) {
// split this run into two adjacent runs
// right subinterval
- makeRoomAtIndex(run, first+1);
- run->runs[first+1].value = max + 1;
- run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1);
+ makeRoomAtIndex(run, (uint16_t)(first + 1));
+ run->runs[first + 1].value = (uint16_t)(max + 1);
+ run->runs[first + 1].length =
+ (uint16_t)((run->runs[first].value + run->runs[first].length) -
+ (max + 1));
// left subinterval
- run->runs[first].length = (min - 1) - run->runs[first].value;
+ run->runs[first].length =
+ (uint16_t)((min - 1) - run->runs[first].value);
return;
}
@@ -3394,33 +3064,37 @@ static inline void run_container_remove_range(run_container_t *run, uint32_t min
// update left-most partial run
if (first >= 0) {
if (min > run->runs[first].value) {
- run->runs[first].length = (min - 1) - run->runs[first].value;
+ run->runs[first].length =
+ (uint16_t)((min - 1) - run->runs[first].value);
first++;
}
} else {
- first = -first-1;
+ first = -first - 1;
}
// update right-most run
if (last >= 0) {
uint16_t run_max = run->runs[last].value + run->runs[last].length;
if (run_max > max) {
- run->runs[last].value = max + 1;
- run->runs[last].length = run_max - (max + 1);
+ run->runs[last].value = (uint16_t)(max + 1);
+ run->runs[last].length = (uint16_t)(run_max - (max + 1));
last--;
}
} else {
- last = (-last-1) - 1;
+ last = (-last - 1) - 1;
}
// remove intermediate runs
if (first <= last) {
- run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1));
+ run_container_shift_tail(run, run->n_runs - (last + 1),
+ -(last - first + 1));
}
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_RUN_H_ */
@@ -3436,7 +3110,9 @@ static inline void run_container_remove_range(run_container_t *run, uint32_t min
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Convert an array into a bitset. The input container is not freed or modified.
@@ -3459,39 +3135,38 @@ run_container_t *run_container_from_array(const array_container_t *c);
/* convert a run into either an array or a bitset
* might free the container. This does not free the input run container. */
-container_t *convert_to_bitset_or_array_container(
- run_container_t *rc, int32_t card,
- uint8_t *resulttype);
+container_t *convert_to_bitset_or_array_container(run_container_t *rc,
+ int32_t card,
+ uint8_t *resulttype);
/* convert containers to and from runcontainers, as is most space efficient.
* The container might be freed. */
-container_t *convert_run_optimize(
- container_t *c, uint8_t typecode_original,
- uint8_t *typecode_after);
+container_t *convert_run_optimize(container_t *c, uint8_t typecode_original,
+ uint8_t *typecode_after);
/* converts a run container to either an array or a bitset, IF it saves space.
*/
/* If a conversion occurs, the caller is responsible to free the original
* container and
* he becomes reponsible to free the new one. */
-container_t *convert_run_to_efficient_container(
- run_container_t *c, uint8_t *typecode_after);
+container_t *convert_run_to_efficient_container(run_container_t *c,
+ uint8_t *typecode_after);
// like convert_run_to_efficient_container but frees the old result if needed
container_t *convert_run_to_efficient_container_and_free(
- run_container_t *c, uint8_t *typecode_after);
+ run_container_t *c, uint8_t *typecode_after);
/**
* Create new container which is a union of run container and
* range [min, max]. Caller is responsible for freeing run container.
*/
-container_t *container_from_run_range(
- const run_container_t *run,
- uint32_t min, uint32_t max,
- uint8_t *typecode_after);
+container_t *container_from_run_range(const run_container_t *run, uint32_t min,
+ uint32_t max, uint8_t *typecode_after);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */
@@ -3507,7 +3182,9 @@ container_t *container_from_run_range(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/**
@@ -3528,7 +3205,9 @@ bool run_container_equals_bitset(const run_container_t* container1,
const bitset_container_t* container2);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* CONTAINERS_MIXED_EQUAL_H_ */
@@ -3544,7 +3223,9 @@ bool run_container_equals_bitset(const run_container_t* container1,
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/**
@@ -3554,31 +3235,33 @@ bool array_container_is_subset_bitset(const array_container_t* container1,
const bitset_container_t* container2);
/**
-* Return true if container1 is a subset of container2.
+ * Return true if container1 is a subset of container2.
*/
bool run_container_is_subset_array(const run_container_t* container1,
const array_container_t* container2);
/**
-* Return true if container1 is a subset of container2.
+ * Return true if container1 is a subset of container2.
*/
bool array_container_is_subset_run(const array_container_t* container1,
const run_container_t* container2);
/**
-* Return true if container1 is a subset of container2.
+ * Return true if container1 is a subset of container2.
*/
bool run_container_is_subset_bitset(const run_container_t* container1,
const bitset_container_t* container2);
/**
-* Return true if container1 is a subset of container2.
-*/
+ * Return true if container1 is a subset of container2.
+ */
bool bitset_container_is_subset_run(const bitset_container_t* container1,
const run_container_t* container2);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* CONTAINERS_MIXED_SUBSET_H_ */
@@ -3592,7 +3275,9 @@ bool bitset_container_is_subset_run(const bitset_container_t* container1,
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the andnot of src_1 and src_2 and write the result to
@@ -3612,9 +3297,9 @@ void array_bitset_container_iandnot(array_container_t *src_1,
* Return true for a bitset result; false for array
*/
-bool bitset_array_container_andnot(
- const bitset_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool bitset_array_container_andnot(const bitset_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst (which has no container initially). It will modify src_1
@@ -3623,9 +3308,9 @@ bool bitset_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_array_container_iandnot(
- bitset_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool bitset_array_container_iandnot(bitset_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst. Result may be either a bitset or an array container
@@ -3634,9 +3319,9 @@ bool bitset_array_container_iandnot(
* result true) or an array container.
*/
-bool run_bitset_container_andnot(
- const run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool run_bitset_container_andnot(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst. Result may be either a bitset or an array container
@@ -3645,9 +3330,9 @@ bool run_bitset_container_andnot(
* result true) or an array container.
*/
-bool run_bitset_container_iandnot(
- run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool run_bitset_container_iandnot(run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst. Result may be either a bitset or an array container
@@ -3656,9 +3341,9 @@ bool run_bitset_container_iandnot(
* result true) or an array container.
*/
-bool bitset_run_container_andnot(
- const bitset_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+bool bitset_run_container_andnot(const bitset_container_t *src_1,
+ const run_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst (which has no container initially). It will modify src_1
@@ -3667,17 +3352,17 @@ bool bitset_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_run_container_iandnot(
- bitset_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+bool bitset_run_container_iandnot(bitset_container_t *src_1,
+ const run_container_t *src_2,
+ container_t **dst);
/* dst does not indicate a valid container initially. Eventually it
* can become any type of container.
*/
-int run_array_container_andnot(
- const run_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+int run_array_container_andnot(const run_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst (which has no container initially). It will modify src_1
@@ -3686,9 +3371,9 @@ int run_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-int run_array_container_iandnot(
- run_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+int run_array_container_iandnot(run_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* dst must be a valid array container, allowed to be src_1 */
@@ -3707,9 +3392,8 @@ void array_run_container_iandnot(array_container_t *src_1,
* can become any kind of container.
*/
-int run_run_container_andnot(
- const run_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+int run_run_container_andnot(const run_container_t *src_1,
+ const run_container_t *src_2, container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst (which has no container initially). It will modify src_1
@@ -3718,9 +3402,8 @@ int run_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-int run_run_container_iandnot(
- run_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+int run_run_container_iandnot(run_container_t *src_1,
+ const run_container_t *src_2, container_t **dst);
/*
* dst is a valid array container and may be the same as src_1
@@ -3740,9 +3423,9 @@ void array_array_container_iandnot(array_container_t *src_1,
* "dst is a bitset"
*/
-bool bitset_bitset_container_andnot(
- const bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* Compute the andnot of src_1 and src_2 and write the result to
* dst (which has no container initially). It will modify src_1
@@ -3751,12 +3434,14 @@ bool bitset_bitset_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_bitset_container_iandnot(
- bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif
@@ -3777,7 +3462,9 @@ bool bitset_bitset_container_iandnot(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the intersection of src_1 and src_2 and write the result to
@@ -3791,11 +3478,9 @@ void array_bitset_container_intersection(const array_container_t *src_1,
int array_bitset_container_intersection_cardinality(
const array_container_t *src_1, const bitset_container_t *src_2);
-
-
/* Checking whether src_1 and src_2 intersect. */
bool array_bitset_container_intersect(const array_container_t *src_1,
- const bitset_container_t *src_2);
+ const bitset_container_t *src_2);
/*
* Compute the intersection between src_1 and src_2 and write the result
@@ -3829,18 +3514,17 @@ int array_run_container_intersection_cardinality(const array_container_t *src_1,
/* Compute the size of the intersection between src_1 and src_2
**/
-int run_bitset_container_intersection_cardinality(const run_container_t *src_1,
- const bitset_container_t *src_2);
-
+int run_bitset_container_intersection_cardinality(
+ const run_container_t *src_1, const bitset_container_t *src_2);
/* Check that src_1 and src_2 intersect. */
bool array_run_container_intersect(const array_container_t *src_1,
- const run_container_t *src_2);
+ const run_container_t *src_2);
/* Check that src_1 and src_2 intersect.
**/
bool run_bitset_container_intersect(const run_container_t *src_1,
- const bitset_container_t *src_2);
+ const bitset_container_t *src_2);
/*
* Same as bitset_bitset_container_intersection except that if the output is to
@@ -3855,7 +3539,9 @@ bool bitset_bitset_container_intersection_inplace(
container_t **dst);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */
@@ -3871,7 +3557,9 @@ bool bitset_bitset_container_intersection_inplace(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Negation across the entire range of the container.
@@ -3891,9 +3579,8 @@ void array_container_negation(const array_container_t *src,
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-bool bitset_container_negation(
- const bitset_container_t *src,
- container_t **dst);
+bool bitset_container_negation(const bitset_container_t *src,
+ container_t **dst);
/* inplace version */
/*
@@ -3904,9 +3591,8 @@ bool bitset_container_negation(
* to free the container.
* In all cases, the result is in *dst.
*/
-bool bitset_container_negation_inplace(
- bitset_container_t *src,
- container_t **dst);
+bool bitset_container_negation_inplace(bitset_container_t *src,
+ container_t **dst);
/* Negation across the entire range of container
* Compute the negation of src and write the result
@@ -3931,19 +3617,18 @@ int run_container_negation_inplace(run_container_t *src, container_t **dst);
* to *dst. Returns true if the result is a bitset container
* and false for an array container. *dst is not preallocated.
*/
-bool array_container_negation_range(
- const array_container_t *src,
- const int range_start, const int range_end,
- container_t **dst);
+bool array_container_negation_range(const array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
/* Even when the result would fit, it is unclear how to make an
* inplace version without inefficient copying. Thus this routine
* may be a wrapper for the non-in-place version
*/
-bool array_container_negation_range_inplace(
- array_container_t *src,
- const int range_start, const int range_end,
- container_t **dst);
+bool array_container_negation_range_inplace(array_container_t *src,
+ const int range_start,
+ const int range_end,
+ container_t **dst);
/* Negation across a range of the container
* Compute the negation of src and write the result
@@ -3952,10 +3637,9 @@ bool array_container_negation_range_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-bool bitset_container_negation_range(
- const bitset_container_t *src,
- const int range_start, const int range_end,
- container_t **dst);
+bool bitset_container_negation_range(const bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
/* inplace version */
/*
@@ -3966,10 +3650,10 @@ bool bitset_container_negation_range(
* to free the container.
* In all cases, the result is in *dst.
*/
-bool bitset_container_negation_range_inplace(
- bitset_container_t *src,
- const int range_start, const int range_end,
- container_t **dst);
+bool bitset_container_negation_range_inplace(bitset_container_t *src,
+ const int range_start,
+ const int range_end,
+ container_t **dst);
/* Negation across a range of container
* Compute the negation of src and write the result
@@ -3977,10 +3661,9 @@ bool bitset_container_negation_range_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-int run_container_negation_range(
- const run_container_t *src,
- const int range_start, const int range_end,
- container_t **dst);
+int run_container_negation_range(const run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
/*
* Same as run_container_negation except that if the output is to
@@ -3989,13 +3672,15 @@ int run_container_negation_range(
* then src is modified and no allocation is made.
* In all cases, the result is in *dst.
*/
-int run_container_negation_range_inplace(
- run_container_t *src,
- const int range_start, const int range_end,
- container_t **dst);
+int run_container_negation_range_inplace(run_container_t *src,
+ const int range_start,
+ const int range_end,
+ container_t **dst);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */
@@ -4016,7 +3701,9 @@ int run_container_negation_range_inplace(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the union of src_1 and src_2 and write the result to
@@ -4038,9 +3725,9 @@ void array_bitset_container_lazy_union(const array_container_t *src_1,
* otherwise is a array_container_t. We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-bool array_array_container_union(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_union(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/*
* Compute the union between src_1 and src_2 and write the result
@@ -4048,27 +3735,28 @@ bool array_array_container_union(
* the result is a bitset_container_t
* otherwise is a array_container_t. When the result is an array_container_t, it
* it either written to src_1 (if *dst is null) or to *dst.
- * If the result is a bitset_container_t and *dst is null, then there was a failure.
+ * If the result is a bitset_container_t and *dst is null, then there was a
+ * failure.
*/
-bool array_array_container_inplace_union(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_inplace_union(array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/*
* Same as array_array_container_union except that it will more eagerly produce
* a bitset.
*/
-bool array_array_container_lazy_union(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_lazy_union(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/*
- * Same as array_array_container_inplace_union except that it will more eagerly produce
- * a bitset.
+ * Same as array_array_container_inplace_union except that it will more eagerly
+ * produce a bitset.
*/
-bool array_array_container_lazy_inplace_union(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_lazy_inplace_union(array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* Compute the union of src_1 and src_2 and write the result to
* dst. We assume that dst is a
@@ -4106,7 +3794,9 @@ void run_bitset_container_lazy_union(const run_container_t *src_1,
bitset_container_t *dst);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */
@@ -4134,15 +3824,17 @@ void run_bitset_container_lazy_union(const run_container_t *src_1,
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the xor of src_1 and src_2 and write the result to
* dst (which has no container initially).
* Result is true iff dst is a bitset */
-bool array_bitset_container_xor(
- const array_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool array_bitset_container_xor(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* Compute the xor of src_1 and src_2 and write the result to
* dst. It is allowed for src_2 to be dst. This version does not
@@ -4157,9 +3849,9 @@ void array_bitset_container_lazy_xor(const array_container_t *src_1,
* "dst is a bitset"
*/
-bool bitset_bitset_container_xor(
- const bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool bitset_bitset_container_xor(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* Compute the xor of src_1 and src_2 and write the result to
* dst. Result may be either a bitset or an array container
@@ -4168,9 +3860,9 @@ bool bitset_bitset_container_xor(
* result true) or an array container.
*/
-bool run_bitset_container_xor(
- const run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool run_bitset_container_xor(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* lazy xor. Dst is initialized and may be equal to src_2.
* Result is left as a bitset container, even if actual
@@ -4185,17 +3877,16 @@ void run_bitset_container_lazy_xor(const run_container_t *src_1,
* can become any kind of container.
*/
-int array_run_container_xor(
- const array_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+int array_run_container_xor(const array_container_t *src_1,
+ const run_container_t *src_2, container_t **dst);
/* dst does not initially have a valid container. Creates either
* an array or a bitset container, indicated by return code
*/
-bool array_array_container_xor(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_xor(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* dst does not initially have a valid container. Creates either
* an array or a bitset container, indicated by return code.
@@ -4203,9 +3894,9 @@ bool array_array_container_xor(
* container type might not be correct for the actual cardinality
*/
-bool array_array_container_lazy_xor(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_lazy_xor(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
/* Dst is a valid run container. (Can it be src_2? Let's say not.)
* Leaves result as run container, even if other options are
@@ -4220,9 +3911,8 @@ void array_run_container_lazy_xor(const array_container_t *src_1,
* can become any kind of container.
*/
-int run_run_container_xor(
- const run_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+int run_run_container_xor(const run_container_t *src_1,
+ const run_container_t *src_2, container_t **dst);
/* INPLACE versions (initial implementation may not exploit all inplace
* opportunities (if any...)
@@ -4235,17 +3925,17 @@ int run_run_container_xor(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_array_container_ixor(
- bitset_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool bitset_array_container_ixor(bitset_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
-bool bitset_bitset_container_ixor(
- bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool bitset_bitset_container_ixor(bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
-bool array_bitset_container_ixor(
- array_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool array_bitset_container_ixor(array_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
/* Compute the xor of src_1 and src_2 and write the result to
* dst. Result may be either a bitset or an array container
@@ -4254,36 +3944,34 @@ bool array_bitset_container_ixor(
* result true) or an array container.
*/
-bool run_bitset_container_ixor(
- run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst);
+bool run_bitset_container_ixor(run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
-bool bitset_run_container_ixor(
- bitset_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+bool bitset_run_container_ixor(bitset_container_t *src_1,
+ const run_container_t *src_2, container_t **dst);
/* dst does not indicate a valid container initially. Eventually it
* can become any kind of container.
*/
-int array_run_container_ixor(
- array_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+int array_run_container_ixor(array_container_t *src_1,
+ const run_container_t *src_2, container_t **dst);
-int run_array_container_ixor(
- run_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+int run_array_container_ixor(run_container_t *src_1,
+ const array_container_t *src_2, container_t **dst);
-bool array_array_container_ixor(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst);
+bool array_array_container_ixor(array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst);
-int run_run_container_ixor(
- run_container_t *src_1, const run_container_t *src_2,
- container_t **dst);
+int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif
@@ -4296,15 +3984,10 @@ int run_run_container_ixor(
#include <stdbool.h>
#include <stdio.h>
-#ifndef WIN32
-#include "ndpi_config.h"
-
-#define NDPI_REPLACE_FPRINTF
-#include "../../ndpi_replace_printf.h"
-#endif
-
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
// would enum be possible or better?
@@ -4330,27 +4013,25 @@ extern "C" { namespace roaring { namespace internal {
* ...
* }
*/
-#define PAIR_CONTAINER_TYPES(type1,type2) \
- (4 * (type1) + (type2))
+#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
-#define CONTAINER_PAIR(name1,name2) \
+#define CONTAINER_PAIR(name1, name2) \
(4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
/**
* A shared container is a wrapper around a container
* with reference counting.
*/
-
STRUCT_CONTAINER(shared_container_s) {
container_t *container;
uint8_t typecode;
- uint32_t counter; // to be managed atomically
+ croaring_refcount_t counter; // to be managed atomically
};
typedef struct shared_container_s shared_container_t;
-#define CAST_shared(c) CAST(shared_container_t *, c) // safer downcast
-#define const_CAST_shared(c) CAST(const shared_container_t *, c)
+#define CAST_shared(c) CAST(shared_container_t *, c) // safer downcast
+#define const_CAST_shared(c) CAST(const shared_container_t *, c)
#define movable_CAST_shared(c) movable_CAST(shared_container_t **, c)
/*
@@ -4376,8 +4057,7 @@ container_t *shared_container_extract_copy(shared_container_t *container,
/* access to container underneath */
static inline const container_t *container_unwrap_shared(
- const container_t *candidate_shared_container, uint8_t *type
-){
+ const container_t *candidate_shared_container, uint8_t *type) {
if (*type == SHARED_CONTAINER_TYPE) {
*type = const_CAST_shared(candidate_shared_container)->typecode;
assert(*type != SHARED_CONTAINER_TYPE);
@@ -4387,11 +4067,9 @@ static inline const container_t *container_unwrap_shared(
}
}
-
/* access to container underneath */
-static inline container_t *container_mutable_unwrap_shared(
- container_t *c, uint8_t *type
-) {
+static inline container_t *container_mutable_unwrap_shared(container_t *c,
+ uint8_t *type) {
if (*type == SHARED_CONTAINER_TYPE) { // the passed in container is shared
*type = CAST_shared(c)->typecode;
assert(*type != SHARED_CONTAINER_TYPE);
@@ -4402,9 +4080,7 @@ static inline container_t *container_mutable_unwrap_shared(
}
/* access to container underneath and queries its type */
-static inline uint8_t get_container_type(
- const container_t *c, uint8_t type
-){
+static inline uint8_t get_container_type(const container_t *c, uint8_t type) {
if (type == SHARED_CONTAINER_TYPE) {
return const_CAST_shared(c)->typecode;
} else {
@@ -4420,9 +4096,8 @@ static inline uint8_t get_container_type(
container_t *container_clone(const container_t *container, uint8_t typecode);
/* access to container underneath, cloning it if needed */
-static inline container_t *get_writable_copy_if_shared(
- container_t *c, uint8_t *type
-){
+static inline container_t *get_writable_copy_if_shared(container_t *c,
+ uint8_t *type) {
if (*type == SHARED_CONTAINER_TYPE) { // shared, return enclosed container
return shared_container_extract_copy(CAST_shared(c), type);
} else {
@@ -4442,9 +4117,8 @@ static const char *shared_container_names[] = {
// if a new container is produced, caller responsible for freeing the previous
// one
// container should not be a shared container
-static inline bitset_container_t *container_to_bitset(
- container_t *c, uint8_t typecode
-){
+static inline bitset_container_t *container_to_bitset(container_t *c,
+ uint8_t typecode) {
bitset_container_t *result = NULL;
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4457,9 +4131,10 @@ static inline bitset_container_t *container_to_bitset(
return result;
case SHARED_CONTAINER_TYPE:
assert(false);
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
@@ -4479,14 +4154,13 @@ static inline bitset_container_t *container_to_bitset(
return container_names[3];
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return "unknown";
}
}*/
-static inline const char *get_full_container_name(
- const container_t *c, uint8_t typecode
-){
+static inline const char *get_full_container_name(const container_t *c,
+ uint8_t typecode) {
switch (typecode) {
case BITSET_CONTAINER_TYPE:
return container_names[0];
@@ -4504,25 +4178,24 @@ static inline const char *get_full_container_name(
return shared_container_names[2];
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return "unknown";
}
break;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return "unknown";
}
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
/**
* Get the container cardinality (number of elements), requires a typecode
*/
-static inline int container_get_cardinality(
- const container_t *c, uint8_t typecode
-){
+static inline int container_get_cardinality(const container_t *c,
+ uint8_t typecode) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4533,12 +4206,10 @@ static inline int container_get_cardinality(
return run_container_cardinality(const_CAST_run(c));
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
-
-
// returns true if a container is known to be full. Note that a lazy bitset
// container
// might be full without us knowing
@@ -4546,22 +4217,20 @@ static inline bool container_is_full(const container_t *c, uint8_t typecode) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
- return bitset_container_cardinality(
- const_CAST_bitset(c)) == (1 << 16);
+ return bitset_container_cardinality(const_CAST_bitset(c)) ==
+ (1 << 16);
case ARRAY_CONTAINER_TYPE:
- return array_container_cardinality(
- const_CAST_array(c)) == (1 << 16);
+ return array_container_cardinality(const_CAST_array(c)) ==
+ (1 << 16);
case RUN_CONTAINER_TYPE:
return run_container_is_full(const_CAST_run(c));
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
-static inline int container_shrink_to_fit(
- container_t *c, uint8_t type
-){
+static inline int container_shrink_to_fit(container_t *c, uint8_t type) {
c = container_mutable_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -4572,45 +4241,40 @@ static inline int container_shrink_to_fit(
return run_container_shrink_to_fit(CAST_run(c));
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
-
/**
* make a container with a run of ones
*/
/* initially always use a run container, even if an array might be
* marginally
* smaller */
-static inline container_t *container_range_of_ones(
- uint32_t range_start, uint32_t range_end,
- uint8_t *result_type
-){
+static inline container_t *container_range_of_ones(uint32_t range_start,
+ uint32_t range_end,
+ uint8_t *result_type) {
assert(range_end >= range_start);
- uint64_t cardinality = range_end - range_start + 1;
- if(cardinality <= 2) {
- *result_type = ARRAY_CONTAINER_TYPE;
- return array_container_create_range(range_start, range_end);
+ uint64_t cardinality = range_end - range_start + 1;
+ if (cardinality <= 2) {
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return array_container_create_range(range_start, range_end);
} else {
- *result_type = RUN_CONTAINER_TYPE;
- return run_container_create_range(range_start, range_end);
+ *result_type = RUN_CONTAINER_TYPE;
+ return run_container_create_range(range_start, range_end);
}
}
-
/* Create a container with all the values between in [min,max) at a
distance k*step from min. */
-static inline container_t *container_from_range(
- uint8_t *type, uint32_t min,
- uint32_t max, uint16_t step
-){
+static inline container_t *container_from_range(uint8_t *type, uint32_t min,
+ uint32_t max, uint16_t step) {
if (step == 0) return NULL; // being paranoid
if (step == 1) {
- return container_range_of_ones(min,max,type);
+ return container_range_of_ones(min, max, type);
// Note: the result is not always a run (need to check the cardinality)
//*type = RUN_CONTAINER_TYPE;
- //return run_container_create_range(min, max);
+ // return run_container_create_range(min, max);
}
int size = (max - min + step - 1) / step;
if (size <= DEFAULT_MAX_SIZE) { // array container
@@ -4631,9 +4295,8 @@ static inline container_t *container_from_range(
/**
* "repair" the container after lazy operations.
*/
-static inline container_t *container_repair_after_lazy(
- container_t *c, uint8_t *type
-){
+static inline container_t *container_repair_after_lazy(container_t *c,
+ uint8_t *type) {
c = get_writable_copy_if_shared(c, type); // !!! unnecessary cloning
container_t *result = NULL;
switch (*type) {
@@ -4646,17 +4309,18 @@ static inline container_t *container_repair_after_lazy(
*type = ARRAY_CONTAINER_TYPE;
return result;
}
- return c; }
+ return c;
+ }
case ARRAY_CONTAINER_TYPE:
return c; // nothing to do
case RUN_CONTAINER_TYPE:
- return convert_run_to_efficient_container_and_free(
- CAST_run(c), type);
+ return convert_run_to_efficient_container_and_free(CAST_run(c),
+ type);
case SHARED_CONTAINER_TYPE:
assert(false);
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
@@ -4668,10 +4332,8 @@ static inline container_t *container_repair_after_lazy(
* container_write(container, buf).
*
*/
-static inline int32_t container_write(
- const container_t *c, uint8_t typecode,
- char *buf
-){
+static inline int32_t container_write(const container_t *c, uint8_t typecode,
+ char *buf) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4682,7 +4344,7 @@ static inline int32_t container_write(
return run_container_write(const_CAST_run(c), buf);
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
@@ -4691,9 +4353,8 @@ static inline int32_t container_write(
* container_write), requires a
* typecode
*/
-static inline int32_t container_size_in_bytes(
- const container_t *c, uint8_t typecode
-){
+static inline int32_t container_size_in_bytes(const container_t *c,
+ uint8_t typecode) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4704,11 +4365,11 @@ static inline int32_t container_size_in_bytes(
return run_container_size_in_bytes(const_CAST_run(c));
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
/**
* print the container (useful for debugging), requires a typecode
*/
@@ -4721,25 +4382,27 @@ void container_printf(const container_t *container, uint8_t typecode);
void container_printf_as_uint32_array(const container_t *container,
uint8_t typecode, uint32_t base);
#endif
+
+bool container_internal_validate(const container_t *container, uint8_t typecode,
+ const char **reason);
/**
* Checks whether a container is not empty, requires a typecode
*/
-static inline bool container_nonzero_cardinality(
- const container_t *c, uint8_t typecode
-){
+static inline bool container_nonzero_cardinality(const container_t *c,
+ uint8_t typecode) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
return bitset_container_const_nonzero_cardinality(
- const_CAST_bitset(c));
+ const_CAST_bitset(c));
case ARRAY_CONTAINER_TYPE:
return array_container_nonzero_cardinality(const_CAST_array(c));
case RUN_CONTAINER_TYPE:
return run_container_nonzero_cardinality(const_CAST_run(c));
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
@@ -4753,25 +4416,23 @@ void container_free(container_t *container, uint8_t typecode);
* "base" (most significant values)
* Returns number of ints added.
*/
-static inline int container_to_uint32_array(
- uint32_t *output,
- const container_t *c, uint8_t typecode,
- uint32_t base
-){
+static inline int container_to_uint32_array(uint32_t *output,
+ const container_t *c,
+ uint8_t typecode, uint32_t base) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
- return bitset_container_to_uint32_array(
- output, const_CAST_bitset(c), base);
+ return bitset_container_to_uint32_array(output,
+ const_CAST_bitset(c), base);
case ARRAY_CONTAINER_TYPE:
- return array_container_to_uint32_array(
- output, const_CAST_array(c), base);
+ return array_container_to_uint32_array(output, const_CAST_array(c),
+ base);
case RUN_CONTAINER_TYPE:
- return run_container_to_uint32_array(
- output, const_CAST_run(c), base);
+ return run_container_to_uint32_array(output, const_CAST_run(c),
+ base);
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0; // unreached
}
@@ -4784,8 +4445,7 @@ static inline int container_to_uint32_array(
static inline container_t *container_add(
container_t *c, uint16_t val,
uint8_t typecode, // !!! should be second argument?
- uint8_t *new_typecode
-){
+ uint8_t *new_typecode) {
c = get_writable_copy_if_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4798,7 +4458,7 @@ static inline container_t *container_add(
*new_typecode = ARRAY_CONTAINER_TYPE;
return ac;
} else {
- bitset_container_t* bitset = bitset_container_from_array(ac);
+ bitset_container_t *bitset = bitset_container_from_array(ac);
bitset_container_add(bitset, val);
*new_typecode = BITSET_CONTAINER_TYPE;
return bitset;
@@ -4811,7 +4471,7 @@ static inline container_t *container_add(
return c;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -4826,8 +4486,7 @@ static inline container_t *container_add(
static inline container_t *container_remove(
container_t *c, uint16_t val,
uint8_t typecode, // !!! should be second argument?
- uint8_t *new_typecode
-){
+ uint8_t *new_typecode) {
c = get_writable_copy_if_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4851,7 +4510,7 @@ static inline container_t *container_remove(
return c;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -4860,10 +4519,9 @@ static inline container_t *container_remove(
* Check whether a value is in a container, requires a typecode
*/
static inline bool container_contains(
- const container_t *c,
- uint16_t val,
+ const container_t *c, uint16_t val,
uint8_t typecode // !!! should be second argument?
-){
+) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
@@ -4874,34 +4532,33 @@ static inline bool container_contains(
return run_container_contains(const_CAST_run(c), val);
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
}
/**
- * Check whether a range of values from range_start (included) to range_end (excluded)
- * is in a container, requires a typecode
+ * Check whether a range of values from range_start (included) to range_end
+ * (excluded) is in a container, requires a typecode
*/
static inline bool container_contains_range(
- const container_t *c,
- uint32_t range_start, uint32_t range_end,
+ const container_t *c, uint32_t range_start, uint32_t range_end,
uint8_t typecode // !!! should be second argument?
-){
+) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
- return bitset_container_get_range(const_CAST_bitset(c),
- range_start, range_end);
+ return bitset_container_get_range(const_CAST_bitset(c), range_start,
+ range_end);
case ARRAY_CONTAINER_TYPE:
return array_container_contains_range(const_CAST_array(c),
- range_start, range_end);
+ range_start, range_end);
case RUN_CONTAINER_TYPE:
- return run_container_contains_range(const_CAST_run(c),
- range_start, range_end);
+ return run_container_contains_range(const_CAST_run(c), range_start,
+ range_end);
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
}
@@ -4910,54 +4567,51 @@ static inline bool container_contains_range(
* Returns true if the two containers have the same content. Note that
* two containers having different types can be "equal" in this sense.
*/
-static inline bool container_equals(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2
-){
+static inline bool container_equals(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
return bitset_container_equals(const_CAST_bitset(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
return run_container_equals_bitset(const_CAST_run(c2),
const_CAST_bitset(c1));
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
return run_container_equals_bitset(const_CAST_run(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
// java would always return false?
return array_container_equal_bitset(const_CAST_array(c2),
const_CAST_bitset(c1));
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
// java would always return false?
return array_container_equal_bitset(const_CAST_array(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
return run_container_equals_array(const_CAST_run(c2),
const_CAST_array(c1));
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
return run_container_equals_array(const_CAST_run(c1),
const_CAST_array(c2));
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
return array_container_equals(const_CAST_array(c1),
const_CAST_array(c2));
- case CONTAINER_PAIR(RUN,RUN):
- return run_container_equals(const_CAST_run(c1),
- const_CAST_run(c2));
+ case CONTAINER_PAIR(RUN, RUN):
+ return run_container_equals(const_CAST_run(c1), const_CAST_run(c2));
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
}
@@ -4966,51 +4620,49 @@ static inline bool container_equals(
* Returns true if the container c1 is a subset of the container c2. Note that
* c1 can be a subset of c2 even if they have a different type.
*/
-static inline bool container_is_subset(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2
-){
+static inline bool container_is_subset(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
return bitset_container_is_subset(const_CAST_bitset(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
return bitset_container_is_subset_run(const_CAST_bitset(c1),
const_CAST_run(c2));
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
return run_container_is_subset_bitset(const_CAST_run(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
return false; // by construction, size(c1) > size(c2)
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
return array_container_is_subset_bitset(const_CAST_array(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
return array_container_is_subset_run(const_CAST_array(c1),
const_CAST_run(c2));
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
return run_container_is_subset_array(const_CAST_run(c1),
const_CAST_array(c2));
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
return array_container_is_subset(const_CAST_array(c1),
const_CAST_array(c2));
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
return run_container_is_subset(const_CAST_run(c1),
const_CAST_run(c2));
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
}
@@ -5022,40 +4674,36 @@ static inline bool container_is_subset(
* type result_type), requires a typecode. This allocates new memory, caller
* is responsible for deallocation.
*/
-static inline container_t *container_and(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_and(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- *result_type = bitset_bitset_container_intersection(
- const_CAST_bitset(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, BITSET):
+ *result_type =
+ bitset_bitset_container_intersection(
+ const_CAST_bitset(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
result = array_container_create();
- array_container_intersection(const_CAST_array(c1),
- const_CAST_array(c2),
- CAST_array(result));
+ array_container_intersection(
+ const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
result = run_container_create();
- run_container_intersection(const_CAST_run(c1),
- const_CAST_run(c2),
+ run_container_intersection(const_CAST_run(c1), const_CAST_run(c2),
CAST_run(result));
- return convert_run_to_efficient_container_and_free(
- CAST_run(result), result_type);
+ return convert_run_to_efficient_container_and_free(CAST_run(result),
+ result_type);
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
result = array_container_create();
array_bitset_container_intersection(const_CAST_array(c2),
const_CAST_bitset(c1),
@@ -5063,7 +4711,7 @@ static inline container_t *container_and(
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
array_bitset_container_intersection(const_CAST_array(c1),
@@ -5071,41 +4719,39 @@ static inline container_t *container_and(
CAST_array(result));
return result;
- case CONTAINER_PAIR(BITSET,RUN):
- *result_type = run_bitset_container_intersection(
- const_CAST_run(c2),
- const_CAST_bitset(c1), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, RUN):
+ *result_type =
+ run_bitset_container_intersection(
+ const_CAST_run(c2), const_CAST_bitset(c1), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
- *result_type = run_bitset_container_intersection(
- const_CAST_run(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(RUN, BITSET):
+ *result_type =
+ run_bitset_container_intersection(
+ const_CAST_run(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
- array_run_container_intersection(const_CAST_array(c1),
- const_CAST_run(c2),
- CAST_array(result));
+ array_run_container_intersection(
+ const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
- array_run_container_intersection(const_CAST_array(c2),
- const_CAST_run(c1),
- CAST_array(result));
+ array_run_container_intersection(
+ const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -5113,52 +4759,52 @@ static inline container_t *container_and(
/**
* Compute the size of the intersection between two containers.
*/
-static inline int container_and_cardinality(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2
-){
+static inline int container_and_cardinality(const container_t *c1,
+ uint8_t type1,
+ const container_t *c2,
+ uint8_t type2) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- return bitset_container_and_justcard(
- const_CAST_bitset(c1), const_CAST_bitset(c2));
+ case CONTAINER_PAIR(BITSET, BITSET):
+ return bitset_container_and_justcard(const_CAST_bitset(c1),
+ const_CAST_bitset(c2));
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
return array_container_intersection_cardinality(
const_CAST_array(c1), const_CAST_array(c2));
- case CONTAINER_PAIR(RUN,RUN):
- return run_container_intersection_cardinality(
- const_CAST_run(c1), const_CAST_run(c2));
+ case CONTAINER_PAIR(RUN, RUN):
+ return run_container_intersection_cardinality(const_CAST_run(c1),
+ const_CAST_run(c2));
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
return array_bitset_container_intersection_cardinality(
const_CAST_array(c2), const_CAST_bitset(c1));
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
return array_bitset_container_intersection_cardinality(
const_CAST_array(c1), const_CAST_bitset(c2));
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
return run_bitset_container_intersection_cardinality(
const_CAST_run(c2), const_CAST_bitset(c1));
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
return run_bitset_container_intersection_cardinality(
const_CAST_run(c1), const_CAST_bitset(c2));
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
return array_run_container_intersection_cardinality(
const_CAST_array(c1), const_CAST_run(c2));
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
return array_run_container_intersection_cardinality(
const_CAST_array(c2), const_CAST_run(c1));
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0;
}
}
@@ -5166,52 +4812,50 @@ static inline int container_and_cardinality(
/**
* Check whether two containers intersect.
*/
-static inline bool container_intersect(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2
-){
+static inline bool container_intersect(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
return bitset_container_intersect(const_CAST_bitset(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
return array_container_intersect(const_CAST_array(c1),
const_CAST_array(c2));
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
return run_container_intersect(const_CAST_run(c1),
const_CAST_run(c2));
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
return array_bitset_container_intersect(const_CAST_array(c2),
const_CAST_bitset(c1));
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
return array_bitset_container_intersect(const_CAST_array(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
return run_bitset_container_intersect(const_CAST_run(c2),
const_CAST_bitset(c1));
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
return run_bitset_container_intersect(const_CAST_run(c1),
const_CAST_bitset(c2));
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
return array_run_container_intersect(const_CAST_array(c1),
const_CAST_run(c2));
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
return array_run_container_intersect(const_CAST_array(c2),
const_CAST_run(c1));
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return 0;
}
}
@@ -5225,40 +4869,36 @@ static inline bool container_intersect(
The type of the first container may change. Returns the modified
(and possibly new) container.
*/
-static inline container_t *container_iand(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_iand(container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type) {
c1 = get_writable_copy_if_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- *result_type =
- bitset_bitset_container_intersection_inplace(
- CAST_bitset(c1), const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, BITSET):
+ *result_type = bitset_bitset_container_intersection_inplace(
+ CAST_bitset(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
array_container_intersection_inplace(CAST_array(c1),
const_CAST_array(c2));
*result_type = ARRAY_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
result = run_container_create();
- run_container_intersection(const_CAST_run(c1),
- const_CAST_run(c2),
+ run_container_intersection(const_CAST_run(c1), const_CAST_run(c2),
CAST_run(result));
// as of January 2016, Java code used non-in-place intersection for
// two runcontainers
- return convert_run_to_efficient_container_and_free(
- CAST_run(result), result_type);
+ return convert_run_to_efficient_container_and_free(CAST_run(result),
+ result_type);
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
// c1 is a bitmap so no inplace possible
result = array_container_create();
array_bitset_container_intersection(const_CAST_array(c2),
@@ -5267,49 +4907,46 @@ static inline container_t *container_iand(
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
array_bitset_container_intersection(
- const_CAST_array(c1), const_CAST_bitset(c2),
- CAST_array(c1)); // result is allowed to be same as c1
+ const_CAST_array(c1), const_CAST_bitset(c2),
+ CAST_array(c1)); // result is allowed to be same as c1
return c1;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
// will attempt in-place computation
*result_type = run_bitset_container_intersection(
- const_CAST_run(c2),
- const_CAST_bitset(c1), &c1)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ const_CAST_run(c2), const_CAST_bitset(c1), &c1)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(RUN,BITSET):
- *result_type = run_bitset_container_intersection(
- const_CAST_run(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(RUN, BITSET):
+ *result_type =
+ run_bitset_container_intersection(
+ const_CAST_run(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
- array_run_container_intersection(const_CAST_array(c1),
- const_CAST_run(c2),
- CAST_array(result));
+ array_run_container_intersection(
+ const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE; // never bitset
- array_run_container_intersection(const_CAST_array(c2),
- const_CAST_run(c1),
- CAST_array(result));
+ array_run_container_intersection(
+ const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -5319,43 +4956,39 @@ static inline container_t *container_iand(
* result_type), requires a typecode. This allocates new memory, caller
* is responsible for deallocation.
*/
-static inline container_t *container_or(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_or(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
result = bitset_container_create();
- bitset_container_or(const_CAST_bitset(c1),
- const_CAST_bitset(c2),
+ bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
CAST_bitset(result));
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
- *result_type = array_array_container_union(
- const_CAST_array(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(ARRAY, ARRAY):
+ *result_type =
+ array_array_container_union(const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
result = run_container_create();
- run_container_union(const_CAST_run(c1),
- const_CAST_run(c2),
+ run_container_union(const_CAST_run(c1), const_CAST_run(c2),
CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// todo: could be optimized since will never convert to array
result = convert_run_to_efficient_container_and_free(
- CAST_run(result), result_type);
+ CAST_run(result), result_type);
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
result = bitset_container_create();
array_bitset_container_union(const_CAST_array(c2),
const_CAST_bitset(c1),
@@ -5363,7 +4996,7 @@ static inline container_t *container_or(
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
result = bitset_container_create();
array_bitset_container_union(const_CAST_array(c1),
const_CAST_bitset(c2),
@@ -5371,57 +5004,51 @@ static inline container_t *container_or(
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = run_container_create();
*result_type = RUN_CONTAINER_TYPE;
- run_container_copy(const_CAST_run(c2),
- CAST_run(result));
+ run_container_copy(const_CAST_run(c2), CAST_run(result));
return result;
}
result = bitset_container_create();
- run_bitset_container_union(const_CAST_run(c2),
- const_CAST_bitset(c1),
- CAST_bitset(result));
+ run_bitset_container_union(
+ const_CAST_run(c2), const_CAST_bitset(c1), CAST_bitset(result));
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
if (run_container_is_full(const_CAST_run(c1))) {
result = run_container_create();
*result_type = RUN_CONTAINER_TYPE;
- run_container_copy(const_CAST_run(c1),
- CAST_run(result));
+ run_container_copy(const_CAST_run(c1), CAST_run(result));
return result;
}
result = bitset_container_create();
- run_bitset_container_union(const_CAST_run(c1),
- const_CAST_bitset(c2),
- CAST_bitset(result));
+ run_bitset_container_union(
+ const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = run_container_create();
- array_run_container_union(const_CAST_array(c1),
- const_CAST_run(c2),
+ array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
CAST_run(result));
result = convert_run_to_efficient_container_and_free(
- CAST_run(result), result_type);
+ CAST_run(result), result_type);
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
result = run_container_create();
- array_run_container_union(const_CAST_array(c2),
- const_CAST_run(c1),
+ array_run_container_union(const_CAST_array(c2), const_CAST_run(c1),
CAST_run(result));
result = convert_run_to_efficient_container_and_free(
- CAST_run(result), result_type);
+ CAST_run(result), result_type);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL; // unreached
}
}
@@ -5434,35 +5061,34 @@ static inline container_t *container_or(
* This lazy version delays some operations such as the maintenance of the
* cardinality. It requires repair later on the generated containers.
*/
-static inline container_t *container_lazy_or(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_lazy_or(const container_t *c1,
+ uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
result = bitset_container_create();
- bitset_container_or_nocard(
- const_CAST_bitset(c1), const_CAST_bitset(c2),
- CAST_bitset(result)); // is lazy
+ bitset_container_or_nocard(const_CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
- *result_type = array_array_container_lazy_union(
- const_CAST_array(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(ARRAY, ARRAY):
+ *result_type =
+ array_array_container_lazy_union(const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
result = run_container_create();
- run_container_union(const_CAST_run(c1),
- const_CAST_run(c2),
+ run_container_union(const_CAST_run(c1), const_CAST_run(c2),
CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// we are being lazy
@@ -5470,23 +5096,23 @@ static inline container_t *container_lazy_or(
CAST_run(result), result_type);
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
result = bitset_container_create();
- array_bitset_container_lazy_union(
- const_CAST_array(c2), const_CAST_bitset(c1),
- CAST_bitset(result)); // is lazy
+ array_bitset_container_lazy_union(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(result)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
result = bitset_container_create();
- array_bitset_container_lazy_union(
- const_CAST_array(c1), const_CAST_bitset(c2),
- CAST_bitset(result)); // is lazy
+ array_bitset_container_lazy_union(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = run_container_create();
*result_type = RUN_CONTAINER_TYPE;
@@ -5494,13 +5120,13 @@ static inline container_t *container_lazy_or(
return result;
}
result = bitset_container_create();
- run_bitset_container_lazy_union(
- const_CAST_run(c2), const_CAST_bitset(c1),
- CAST_bitset(result)); // is lazy
+ run_bitset_container_lazy_union(const_CAST_run(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(result)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
if (run_container_is_full(const_CAST_run(c1))) {
result = run_container_create();
*result_type = RUN_CONTAINER_TYPE;
@@ -5508,27 +5134,25 @@ static inline container_t *container_lazy_or(
return result;
}
result = bitset_container_create();
- run_bitset_container_lazy_union(
- const_CAST_run(c1), const_CAST_bitset(c2),
- CAST_bitset(result)); // is lazy
+ run_bitset_container_lazy_union(const_CAST_run(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = run_container_create();
- array_run_container_union(const_CAST_array(c1),
- const_CAST_run(c2),
+ array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// next line skipped since we are lazy
// result = convert_run_to_efficient_container(result, result_type);
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
result = run_container_create();
- array_run_container_union(
- const_CAST_array(c2), const_CAST_run(c1),
- CAST_run(result)); // TODO make lazy
+ array_run_container_union(const_CAST_array(c2), const_CAST_run(c1),
+ CAST_run(result)); // TODO make lazy
*result_type = RUN_CONTAINER_TYPE;
// next line skipped since we are lazy
// result = convert_run_to_efficient_container(result, result_type);
@@ -5536,7 +5160,7 @@ static inline container_t *container_lazy_or(
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL; // unreached
}
}
@@ -5549,19 +5173,16 @@ static inline container_t *container_lazy_or(
* created and the caller is responsible for freeing it.
* The type of the first container may change. Returns the modified
* (and possibly new) container
-*/
-static inline container_t *container_ior(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+ */
+static inline container_t *container_ior(container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type) {
c1 = get_writable_copy_if_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- bitset_container_or(const_CAST_bitset(c1),
- const_CAST_bitset(c2),
+ case CONTAINER_PAIR(BITSET, BITSET):
+ bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
CAST_bitset(c1));
#ifdef OR_BITSET_CONVERSION_TO_FULL
if (CAST_bitset(c1)->cardinality == (1 << 16)) { // we convert
@@ -5573,30 +5194,28 @@ static inline container_t *container_ior(
*result_type = BITSET_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
*result_type = array_array_container_inplace_union(
- CAST_array(c1), const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
- if((result == NULL)
- && (*result_type == ARRAY_CONTAINER_TYPE)) {
- return c1; // the computation was done in-place!
+ CAST_array(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ if ((result == NULL) && (*result_type == ARRAY_CONTAINER_TYPE)) {
+ return c1; // the computation was done in-place!
}
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
run_container_union_inplace(CAST_run(c1), const_CAST_run(c2));
return convert_run_to_efficient_container(CAST_run(c1),
result_type);
- case CONTAINER_PAIR(BITSET,ARRAY):
- array_bitset_container_union(const_CAST_array(c2),
- const_CAST_bitset(c1),
- CAST_bitset(c1));
+ case CONTAINER_PAIR(BITSET, ARRAY):
+ array_bitset_container_union(
+ const_CAST_array(c2), const_CAST_bitset(c1), CAST_bitset(c1));
*result_type = BITSET_CONTAINER_TYPE; // never array
return c1;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
// c1 is an array, so no in-place possible
result = bitset_container_create();
*result_type = BITSET_CONTAINER_TYPE;
@@ -5605,7 +5224,7 @@ static inline container_t *container_ior(
CAST_bitset(result));
return result;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = run_container_create();
*result_type = RUN_CONTAINER_TYPE;
@@ -5618,37 +5237,34 @@ static inline container_t *container_ior(
*result_type = BITSET_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
if (run_container_is_full(const_CAST_run(c1))) {
*result_type = RUN_CONTAINER_TYPE;
return c1;
}
result = bitset_container_create();
- run_bitset_container_union(const_CAST_run(c1),
- const_CAST_bitset(c2),
- CAST_bitset(result));
+ run_bitset_container_union(
+ const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = run_container_create();
- array_run_container_union(const_CAST_array(c1),
- const_CAST_run(c2),
+ array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
CAST_run(result));
result = convert_run_to_efficient_container_and_free(
- CAST_run(result), result_type);
+ CAST_run(result), result_type);
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
array_run_container_inplace_union(const_CAST_array(c2),
CAST_run(c1));
- c1 = convert_run_to_efficient_container(CAST_run(c1),
- result_type);
+ c1 = convert_run_to_efficient_container(CAST_run(c1), result_type);
return c1;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -5664,22 +5280,20 @@ static inline container_t *container_ior(
*
* This lazy version delays some operations such as the maintenance of the
* cardinality. It requires repair later on the generated containers.
-*/
-static inline container_t *container_lazy_ior(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+ */
+static inline container_t *container_lazy_ior(container_t *c1, uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type) {
assert(type1 != SHARED_CONTAINER_TYPE);
// c1 = get_writable_copy_if_shared(c1,&type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL
// if we have two bitsets, we might as well compute the cardinality
- bitset_container_or(const_CAST_bitset(c1),
- const_CAST_bitset(c2),
+ bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
CAST_bitset(c1));
// it is possible that two bitsets can lead to a full container
if (CAST_bitset(c1)->cardinality == (1 << 16)) { // we convert
@@ -5689,54 +5303,49 @@ static inline container_t *container_lazy_ior(
}
#else
bitset_container_or_nocard(const_CAST_bitset(c1),
- const_CAST_bitset(c2),
- CAST_bitset(c1));
+ const_CAST_bitset(c2), CAST_bitset(c1));
#endif
*result_type = BITSET_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
*result_type = array_array_container_lazy_inplace_union(
- CAST_array(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
- if((result == NULL)
- && (*result_type == ARRAY_CONTAINER_TYPE)) {
- return c1; // the computation was done in-place!
+ CAST_array(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ if ((result == NULL) && (*result_type == ARRAY_CONTAINER_TYPE)) {
+ return c1; // the computation was done in-place!
}
return result;
- case CONTAINER_PAIR(RUN,RUN):
- run_container_union_inplace(CAST_run(c1),
- const_CAST_run(c2));
+ case CONTAINER_PAIR(RUN, RUN):
+ run_container_union_inplace(CAST_run(c1), const_CAST_run(c2));
*result_type = RUN_CONTAINER_TYPE;
return convert_run_to_efficient_container(CAST_run(c1),
result_type);
- case CONTAINER_PAIR(BITSET,ARRAY):
- array_bitset_container_lazy_union(
- const_CAST_array(c2), const_CAST_bitset(c1),
- CAST_bitset(c1)); // is lazy
- *result_type = BITSET_CONTAINER_TYPE; // never array
+ case CONTAINER_PAIR(BITSET, ARRAY):
+ array_bitset_container_lazy_union(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(c1)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE; // never array
return c1;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
// c1 is an array, so no in-place possible
result = bitset_container_create();
*result_type = BITSET_CONTAINER_TYPE;
- array_bitset_container_lazy_union(
- const_CAST_array(c1), const_CAST_bitset(c2),
- CAST_bitset(result)); // is lazy
+ array_bitset_container_lazy_union(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
return result;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = run_container_create();
*result_type = RUN_CONTAINER_TYPE;
- run_container_copy(const_CAST_run(c2),
- CAST_run(result));
+ run_container_copy(const_CAST_run(c2), CAST_run(result));
return result;
}
run_bitset_container_lazy_union(
@@ -5745,22 +5354,21 @@ static inline container_t *container_lazy_ior(
*result_type = BITSET_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
if (run_container_is_full(const_CAST_run(c1))) {
*result_type = RUN_CONTAINER_TYPE;
return c1;
}
result = bitset_container_create();
- run_bitset_container_lazy_union(
- const_CAST_run(c1), const_CAST_bitset(c2),
- CAST_bitset(result)); // lazy
+ run_bitset_container_lazy_union(const_CAST_run(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result)); // lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = run_container_create();
- array_run_container_union(const_CAST_array(c1),
- const_CAST_run(c2),
+ array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// next line skipped since we are lazy
@@ -5768,7 +5376,7 @@ static inline container_t *container_lazy_ior(
// result_type);
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
array_run_container_inplace_union(const_CAST_array(c2),
CAST_run(c1));
*result_type = RUN_CONTAINER_TYPE;
@@ -5779,7 +5387,7 @@ static inline container_t *container_lazy_ior(
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -5789,98 +5397,92 @@ static inline container_t *container_lazy_ior(
* container (having type result_type), requires a typecode. This allocates new
* memory, caller is responsible for deallocation.
*/
-static inline container_t* container_xor(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_xor(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- *result_type = bitset_bitset_container_xor(
- const_CAST_bitset(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
- return result;
-
- case CONTAINER_PAIR(ARRAY,ARRAY):
- *result_type = array_array_container_xor(
- const_CAST_array(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, BITSET):
+ *result_type =
+ bitset_bitset_container_xor(const_CAST_bitset(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
*result_type =
- run_run_container_xor(const_CAST_run(c1),
- const_CAST_run(c2), &result);
+ array_array_container_xor(const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
- *result_type = array_bitset_container_xor(
- const_CAST_array(c2),
- const_CAST_bitset(c1), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(RUN, RUN):
+ *result_type = (uint8_t)run_run_container_xor(
+ const_CAST_run(c1), const_CAST_run(c2), &result);
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
- *result_type = array_bitset_container_xor(
- const_CAST_array(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, ARRAY):
+ *result_type =
+ array_bitset_container_xor(const_CAST_array(c2),
+ const_CAST_bitset(c1), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(BITSET,RUN):
- *result_type = run_bitset_container_xor(
- const_CAST_run(c2),
- const_CAST_bitset(c1), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(ARRAY, BITSET):
+ *result_type =
+ array_bitset_container_xor(const_CAST_array(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
- *result_type = run_bitset_container_xor(
- const_CAST_run(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, RUN):
+ *result_type =
+ run_bitset_container_xor(const_CAST_run(c2),
+ const_CAST_bitset(c1), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(RUN, BITSET):
*result_type =
- array_run_container_xor(const_CAST_array(c1),
- const_CAST_run(c2), &result);
+ run_bitset_container_xor(const_CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
- *result_type =
- array_run_container_xor(const_CAST_array(c2),
- const_CAST_run(c1), &result);
+ case CONTAINER_PAIR(ARRAY, RUN):
+ *result_type = (uint8_t)array_run_container_xor(
+ const_CAST_array(c1), const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(RUN, ARRAY):
+ *result_type = (uint8_t)array_run_container_xor(
+ const_CAST_array(c2), const_CAST_run(c1), &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL; // unreached
}
}
/* Applies an offset to the non-empty container 'c'.
* The results are stored in new containers returned via 'lo' and 'hi', for the
- * low and high halves of the result (where the low half matches the original key
- * and the high one corresponds to values for the following key).
- * Either one of 'lo' and 'hi' are allowed to be 'NULL', but not both.
- * Whenever one of them is not 'NULL', it should point to a 'NULL' container.
- * Whenever one of them is 'NULL' the shifted elements for that part will not be
- * computed.
- * If either of the resulting containers turns out to be empty, the pointed
- * container will remain 'NULL'.
+ * low and high halves of the result (where the low half matches the original
+ * key and the high one corresponds to values for the following key). Either one
+ * of 'lo' and 'hi' are allowed to be 'NULL', but not both. Whenever one of them
+ * is not 'NULL', it should point to a 'NULL' container. Whenever one of them is
+ * 'NULL' the shifted elements for that part will not be computed. If either of
+ * the resulting containers turns out to be empty, the pointed container will
+ * remain 'NULL'.
*/
static inline void container_add_offset(const container_t *c, uint8_t type,
container_t **lo, container_t **hi,
@@ -5892,19 +5494,19 @@ static inline void container_add_offset(const container_t *c, uint8_t type,
assert(hi == NULL || *hi == NULL);
switch (type) {
- case BITSET_CONTAINER_TYPE:
- bitset_container_offset(const_CAST_bitset(c), lo, hi, offset);
- break;
- case ARRAY_CONTAINER_TYPE:
- array_container_offset(const_CAST_array(c), lo, hi, offset);
- break;
- case RUN_CONTAINER_TYPE:
- run_container_offset(const_CAST_run(c), lo, hi, offset);
- break;
- default:
- assert(false);
- __builtin_unreachable();
- break;
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_offset(const_CAST_bitset(c), lo, hi, offset);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ array_container_offset(const_CAST_array(c), lo, hi, offset);
+ break;
+ case RUN_CONTAINER_TYPE:
+ run_container_offset(const_CAST_run(c), lo, hi, offset);
+ break;
+ default:
+ assert(false);
+ roaring_unreachable;
+ break;
}
}
@@ -5916,39 +5518,38 @@ static inline void container_add_offset(const container_t *c, uint8_t type,
* This lazy version delays some operations such as the maintenance of the
* cardinality. It requires repair later on the generated containers.
*/
-static inline container_t *container_lazy_xor(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_lazy_xor(const container_t *c1,
+ uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
result = bitset_container_create();
- bitset_container_xor_nocard(
- const_CAST_bitset(c1), const_CAST_bitset(c2),
- CAST_bitset(result)); // is lazy
+ bitset_container_xor_nocard(const_CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
- *result_type = array_array_container_lazy_xor(
- const_CAST_array(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(ARRAY, ARRAY):
+ *result_type =
+ array_array_container_lazy_xor(const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
// nothing special done yet.
- *result_type =
- run_run_container_xor(const_CAST_run(c1),
- const_CAST_run(c2), &result);
+ *result_type = (uint8_t)run_run_container_xor(
+ const_CAST_run(c1), const_CAST_run(c2), &result);
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
result = bitset_container_create();
*result_type = BITSET_CONTAINER_TYPE;
array_bitset_container_lazy_xor(const_CAST_array(c2),
@@ -5956,7 +5557,7 @@ static inline container_t *container_lazy_xor(
CAST_bitset(result));
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
result = bitset_container_create();
*result_type = BITSET_CONTAINER_TYPE;
array_bitset_container_lazy_xor(const_CAST_array(c1),
@@ -5964,37 +5565,33 @@ static inline container_t *container_lazy_xor(
CAST_bitset(result));
return result;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
result = bitset_container_create();
- run_bitset_container_lazy_xor(const_CAST_run(c2),
- const_CAST_bitset(c1),
- CAST_bitset(result));
+ run_bitset_container_lazy_xor(
+ const_CAST_run(c2), const_CAST_bitset(c1), CAST_bitset(result));
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
result = bitset_container_create();
- run_bitset_container_lazy_xor(const_CAST_run(c1),
- const_CAST_bitset(c2),
- CAST_bitset(result));
+ run_bitset_container_lazy_xor(
+ const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
*result_type = BITSET_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
result = run_container_create();
array_run_container_lazy_xor(const_CAST_array(c1),
- const_CAST_run(c2),
- CAST_run(result));
+ const_CAST_run(c2), CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// next line skipped since we are lazy
// result = convert_run_to_efficient_container(result, result_type);
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
+ case CONTAINER_PAIR(RUN, ARRAY):
result = run_container_create();
array_run_container_lazy_xor(const_CAST_array(c2),
- const_CAST_run(c1),
- CAST_run(result));
+ const_CAST_run(c1), CAST_run(result));
*result_type = RUN_CONTAINER_TYPE;
// next line skipped since we are lazy
// result = convert_run_to_efficient_container(result, result_type);
@@ -6002,7 +5599,7 @@ static inline container_t *container_lazy_xor(
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL; // unreached
}
}
@@ -6012,81 +5609,78 @@ static inline container_t *container_lazy_xor(
* If the returned pointer is identical to c1, then the container has been
* modified.
* If the returned pointer is different from c1, then a new container has been
- * created and the caller is responsible for freeing it.
- * The type of the first container may change. Returns the modified
- * (and possibly new) container
-*/
-static inline container_t *container_ixor(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+ * created. The original container is freed by container_ixor.
+ * The type of the first container may change. Returns the modified (and
+ * possibly new) container.
+ */
+static inline container_t *container_ixor(container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type) {
c1 = get_writable_copy_if_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
*result_type = bitset_bitset_container_ixor(
- CAST_bitset(c1), const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_bitset(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
*result_type = array_array_container_ixor(
- CAST_array(c1), const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_array(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,RUN):
- *result_type = run_run_container_ixor(
+ case CONTAINER_PAIR(RUN, RUN):
+ *result_type = (uint8_t)run_run_container_ixor(
CAST_run(c1), const_CAST_run(c2), &result);
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
*result_type = bitset_array_container_ixor(
- CAST_bitset(c1), const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_bitset(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
*result_type = array_bitset_container_ixor(
- CAST_array(c1), const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_array(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(BITSET,RUN):
- *result_type =
- bitset_run_container_ixor(
- CAST_bitset(c1), const_CAST_run(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, RUN):
+ *result_type = bitset_run_container_ixor(
+ CAST_bitset(c1), const_CAST_run(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
*result_type = run_bitset_container_ixor(
- CAST_run(c1), const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_run(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
- *result_type = array_run_container_ixor(
- CAST_array(c1), const_CAST_run(c2), &result);
+ case CONTAINER_PAIR(ARRAY, RUN):
+ *result_type = (uint8_t)array_run_container_ixor(
+ CAST_array(c1), const_CAST_run(c2), &result);
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
- *result_type = run_array_container_ixor(
- CAST_run(c1), const_CAST_array(c2), &result);
+ case CONTAINER_PAIR(RUN, ARRAY):
+ *result_type = (uint8_t)run_array_container_ixor(
+ CAST_run(c1), const_CAST_array(c2), &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -6102,19 +5696,17 @@ static inline container_t *container_ixor(
*
* This lazy version delays some operations such as the maintenance of the
* cardinality. It requires repair later on the generated containers.
-*/
-static inline container_t *container_lazy_ixor(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+ */
+static inline container_t *container_lazy_ixor(container_t *c1, uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type) {
assert(type1 != SHARED_CONTAINER_TYPE);
// c1 = get_writable_copy_if_shared(c1,&type1);
c2 = container_unwrap_shared(c2, &type2);
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- bitset_container_xor_nocard(CAST_bitset(c1),
- const_CAST_bitset(c2),
+ case CONTAINER_PAIR(BITSET, BITSET):
+ bitset_container_xor_nocard(CAST_bitset(c1), const_CAST_bitset(c2),
CAST_bitset(c1)); // is lazy
*result_type = BITSET_CONTAINER_TYPE;
return c1;
@@ -6139,51 +5731,49 @@ static inline container_t *container_lazy_ixor(
* container (having type result_type), requires a typecode. This allocates new
* memory, caller is responsible for deallocation.
*/
-static inline container_t *container_andnot(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+static inline container_t *container_andnot(const container_t *c1,
+ uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
- *result_type = bitset_bitset_container_andnot(
- const_CAST_bitset(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, BITSET):
+ *result_type =
+ bitset_bitset_container_andnot(const_CAST_bitset(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
+ case CONTAINER_PAIR(ARRAY, ARRAY):
result = array_container_create();
- array_array_container_andnot(const_CAST_array(c1),
- const_CAST_array(c2),
- CAST_array(result));
+ array_array_container_andnot(
+ const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
*result_type = ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,RUN):
+ case CONTAINER_PAIR(RUN, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE;
return result;
}
- *result_type =
- run_run_container_andnot(const_CAST_run(c1),
- const_CAST_run(c2), &result);
+ *result_type = (uint8_t)run_run_container_andnot(
+ const_CAST_run(c1), const_CAST_run(c2), &result);
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
- *result_type = bitset_array_container_andnot(
- const_CAST_bitset(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(BITSET, ARRAY):
+ *result_type =
+ bitset_array_container_andnot(const_CAST_bitset(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
result = array_container_create();
array_bitset_container_andnot(const_CAST_array(c1),
const_CAST_bitset(c2),
@@ -6191,49 +5781,47 @@ static inline container_t *container_andnot(
*result_type = ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE;
return result;
}
- *result_type = bitset_run_container_andnot(
- const_CAST_bitset(c1),
- const_CAST_run(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ *result_type =
+ bitset_run_container_andnot(const_CAST_bitset(c1),
+ const_CAST_run(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
- *result_type = run_bitset_container_andnot(
- const_CAST_run(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ case CONTAINER_PAIR(RUN, BITSET):
+ *result_type =
+ run_bitset_container_andnot(const_CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
if (run_container_is_full(const_CAST_run(c2))) {
result = array_container_create();
*result_type = ARRAY_CONTAINER_TYPE;
return result;
}
result = array_container_create();
- array_run_container_andnot(const_CAST_array(c1),
- const_CAST_run(c2),
+ array_run_container_andnot(const_CAST_array(c1), const_CAST_run(c2),
CAST_array(result));
*result_type = ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,ARRAY):
- *result_type = run_array_container_andnot(
- const_CAST_run(c1), const_CAST_array(c2),
- &result);
+ case CONTAINER_PAIR(RUN, ARRAY):
+ *result_type = (uint8_t)run_array_container_andnot(
+ const_CAST_run(c1), const_CAST_array(c2), &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL; // unreached
}
}
@@ -6244,82 +5832,75 @@ static inline container_t *container_andnot(
* If the returned pointer is identical to c1, then the container has been
* modified.
* If the returned pointer is different from c1, then a new container has been
- * created and the caller is responsible for freeing it.
- * The type of the first container may change. Returns the modified
- * (and possibly new) container
-*/
-static inline container_t *container_iandnot(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type
-){
+ * created. The original container is freed by container_iandnot.
+ * The type of the first container may change. Returns the modified (and
+ * possibly new) container.
+ */
+static inline container_t *container_iandnot(container_t *c1, uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type) {
c1 = get_writable_copy_if_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
container_t *result = NULL;
switch (PAIR_CONTAINER_TYPES(type1, type2)) {
- case CONTAINER_PAIR(BITSET,BITSET):
+ case CONTAINER_PAIR(BITSET, BITSET):
*result_type = bitset_bitset_container_iandnot(
- CAST_bitset(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_bitset(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,ARRAY):
- array_array_container_iandnot(CAST_array(c1),
- const_CAST_array(c2));
+ case CONTAINER_PAIR(ARRAY, ARRAY):
+ array_array_container_iandnot(CAST_array(c1), const_CAST_array(c2));
*result_type = ARRAY_CONTAINER_TYPE;
return c1;
- case CONTAINER_PAIR(RUN,RUN):
- *result_type = run_run_container_iandnot(
+ case CONTAINER_PAIR(RUN, RUN):
+ *result_type = (uint8_t)run_run_container_iandnot(
CAST_run(c1), const_CAST_run(c2), &result);
return result;
- case CONTAINER_PAIR(BITSET,ARRAY):
+ case CONTAINER_PAIR(BITSET, ARRAY):
*result_type = bitset_array_container_iandnot(
- CAST_bitset(c1),
- const_CAST_array(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_bitset(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,BITSET):
+ case CONTAINER_PAIR(ARRAY, BITSET):
*result_type = ARRAY_CONTAINER_TYPE;
array_bitset_container_iandnot(CAST_array(c1),
const_CAST_bitset(c2));
return c1;
- case CONTAINER_PAIR(BITSET,RUN):
+ case CONTAINER_PAIR(BITSET, RUN):
*result_type = bitset_run_container_iandnot(
- CAST_bitset(c1),
- const_CAST_run(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_bitset(c1), const_CAST_run(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(RUN,BITSET):
+ case CONTAINER_PAIR(RUN, BITSET):
*result_type = run_bitset_container_iandnot(
- CAST_run(c1),
- const_CAST_bitset(c2), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ CAST_run(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
- case CONTAINER_PAIR(ARRAY,RUN):
+ case CONTAINER_PAIR(ARRAY, RUN):
*result_type = ARRAY_CONTAINER_TYPE;
- array_run_container_iandnot(CAST_array(c1),
- const_CAST_run(c2));
+ array_run_container_iandnot(CAST_array(c1), const_CAST_run(c2));
return c1;
- case CONTAINER_PAIR(RUN,ARRAY):
- *result_type = run_array_container_iandnot(
+ case CONTAINER_PAIR(RUN, ARRAY):
+ *result_type = (uint8_t)run_array_container_iandnot(
CAST_run(c1), const_CAST_array(c2), &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
@@ -6329,37 +5910,33 @@ static inline container_t *container_iandnot(
* to iterator. You need to specify a container and its type.
* Returns true if the iteration should continue.
*/
-static inline bool container_iterate(
- const container_t *c, uint8_t type,
- uint32_t base,
- roaring_iterator iterator, void *ptr
-){
+static inline bool container_iterate(const container_t *c, uint8_t type,
+ uint32_t base, roaring_iterator iterator,
+ void *ptr) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
- return bitset_container_iterate(const_CAST_bitset(c),
- base, iterator, ptr);
+ return bitset_container_iterate(const_CAST_bitset(c), base,
+ iterator, ptr);
case ARRAY_CONTAINER_TYPE:
- return array_container_iterate(const_CAST_array(c),
- base, iterator, ptr);
+ return array_container_iterate(const_CAST_array(c), base, iterator,
+ ptr);
case RUN_CONTAINER_TYPE:
- return run_container_iterate(const_CAST_run(c),
- base, iterator, ptr);
+ return run_container_iterate(const_CAST_run(c), base, iterator,
+ ptr);
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
-static inline bool container_iterate64(
- const container_t *c, uint8_t type,
- uint32_t base,
- roaring_iterator64 iterator,
- uint64_t high_bits, void *ptr
-){
+static inline bool container_iterate64(const container_t *c, uint8_t type,
+ uint32_t base,
+ roaring_iterator64 iterator,
+ uint64_t high_bits, void *ptr) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -6369,154 +5946,145 @@ static inline bool container_iterate64(
return array_container_iterate64(const_CAST_array(c), base,
iterator, high_bits, ptr);
case RUN_CONTAINER_TYPE:
- return run_container_iterate64(const_CAST_run(c), base,
- iterator, high_bits, ptr);
+ return run_container_iterate64(const_CAST_run(c), base, iterator,
+ high_bits, ptr);
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
-static inline container_t *container_not(
- const container_t *c, uint8_t type,
- uint8_t *result_type
-){
+static inline container_t *container_not(const container_t *c, uint8_t type,
+ uint8_t *result_type) {
c = container_unwrap_shared(c, &type);
container_t *result = NULL;
switch (type) {
case BITSET_CONTAINER_TYPE:
- *result_type = bitset_container_negation(
- const_CAST_bitset(c), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ *result_type =
+ bitset_container_negation(const_CAST_bitset(c), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
case ARRAY_CONTAINER_TYPE:
result = bitset_container_create();
*result_type = BITSET_CONTAINER_TYPE;
- array_container_negation(const_CAST_array(c),
- CAST_bitset(result));
+ array_container_negation(const_CAST_array(c), CAST_bitset(result));
return result;
case RUN_CONTAINER_TYPE:
*result_type =
- run_container_negation(const_CAST_run(c), &result);
+ (uint8_t)run_container_negation(const_CAST_run(c), &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
-static inline container_t *container_not_range(
- const container_t *c, uint8_t type,
- uint32_t range_start, uint32_t range_end,
- uint8_t *result_type
-){
+static inline container_t *container_not_range(const container_t *c,
+ uint8_t type,
+ uint32_t range_start,
+ uint32_t range_end,
+ uint8_t *result_type) {
c = container_unwrap_shared(c, &type);
container_t *result = NULL;
switch (type) {
case BITSET_CONTAINER_TYPE:
*result_type =
- bitset_container_negation_range(
- const_CAST_bitset(c), range_start, range_end, &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ bitset_container_negation_range(const_CAST_bitset(c),
+ range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
case ARRAY_CONTAINER_TYPE:
*result_type =
- array_container_negation_range(
- const_CAST_array(c), range_start, range_end, &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ array_container_negation_range(const_CAST_array(c), range_start,
+ range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
case RUN_CONTAINER_TYPE:
- *result_type = run_container_negation_range(
- const_CAST_run(c), range_start, range_end, &result);
+ *result_type = (uint8_t)run_container_negation_range(
+ const_CAST_run(c), range_start, range_end, &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
-static inline container_t *container_inot(
- container_t *c, uint8_t type,
- uint8_t *result_type
-){
+static inline container_t *container_inot(container_t *c, uint8_t type,
+ uint8_t *result_type) {
c = get_writable_copy_if_shared(c, &type);
container_t *result = NULL;
switch (type) {
case BITSET_CONTAINER_TYPE:
- *result_type = bitset_container_negation_inplace(
- CAST_bitset(c), &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ *result_type =
+ bitset_container_negation_inplace(CAST_bitset(c), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
case ARRAY_CONTAINER_TYPE:
// will never be inplace
result = bitset_container_create();
*result_type = BITSET_CONTAINER_TYPE;
- array_container_negation(CAST_array(c),
- CAST_bitset(result));
+ array_container_negation(CAST_array(c), CAST_bitset(result));
array_container_free(CAST_array(c));
return result;
case RUN_CONTAINER_TYPE:
*result_type =
- run_container_negation_inplace(CAST_run(c), &result);
+ (uint8_t)run_container_negation_inplace(CAST_run(c), &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
-static inline container_t *container_inot_range(
- container_t *c, uint8_t type,
- uint32_t range_start, uint32_t range_end,
- uint8_t *result_type
-){
+static inline container_t *container_inot_range(container_t *c, uint8_t type,
+ uint32_t range_start,
+ uint32_t range_end,
+ uint8_t *result_type) {
c = get_writable_copy_if_shared(c, &type);
container_t *result = NULL;
switch (type) {
case BITSET_CONTAINER_TYPE:
- *result_type =
- bitset_container_negation_range_inplace(
- CAST_bitset(c), range_start, range_end, &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ *result_type = bitset_container_negation_range_inplace(
+ CAST_bitset(c), range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
case ARRAY_CONTAINER_TYPE:
- *result_type =
- array_container_negation_range_inplace(
- CAST_array(c), range_start, range_end, &result)
- ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ *result_type = array_container_negation_range_inplace(
+ CAST_array(c), range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
return result;
case RUN_CONTAINER_TYPE:
- *result_type = run_container_negation_range_inplace(
- CAST_run(c), range_start, range_end, &result);
+ *result_type = (uint8_t)run_container_negation_range_inplace(
+ CAST_run(c), range_start, range_end, &result);
return result;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
@@ -6528,34 +6096,30 @@ static inline container_t *container_inot_range(
* accordingly.
* Otherwise, it returns false and update start_rank.
*/
-static inline bool container_select(
- const container_t *c, uint8_t type,
- uint32_t *start_rank, uint32_t rank,
- uint32_t *element
-){
+static inline bool container_select(const container_t *c, uint8_t type,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
- return bitset_container_select(const_CAST_bitset(c),
- start_rank, rank, element);
+ return bitset_container_select(const_CAST_bitset(c), start_rank,
+ rank, element);
case ARRAY_CONTAINER_TYPE:
- return array_container_select(const_CAST_array(c),
- start_rank, rank, element);
+ return array_container_select(const_CAST_array(c), start_rank, rank,
+ element);
case RUN_CONTAINER_TYPE:
- return run_container_select(const_CAST_run(c),
- start_rank, rank, element);
+ return run_container_select(const_CAST_run(c), start_rank, rank,
+ element);
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
-static inline uint16_t container_maximum(
- const container_t *c, uint8_t type
-){
+static inline uint16_t container_maximum(const container_t *c, uint8_t type) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -6566,16 +6130,14 @@ static inline uint16_t container_maximum(
return run_container_maximum(const_CAST_run(c));
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
-static inline uint16_t container_minimum(
- const container_t *c, uint8_t type
-){
+static inline uint16_t container_minimum(const container_t *c, uint8_t type) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -6586,18 +6148,16 @@ static inline uint16_t container_minimum(
return run_container_minimum(const_CAST_run(c));
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
// number of values smaller or equal to x
-static inline int container_rank(
- const container_t *c, uint8_t type,
- uint16_t x
-){
+static inline int container_rank(const container_t *c, uint8_t type,
+ uint16_t x) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -6608,10 +6168,55 @@ static inline int container_rank(
return run_container_rank(const_CAST_run(c), x);
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
+ }
+ assert(false);
+ roaring_unreachable;
+ return false;
+}
+
+// bulk version of container_rank(); return number of consumed elements
+static inline uint32_t container_rank_many(const container_t *c, uint8_t type,
+ uint64_t start_rank,
+ const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans) {
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_rank_many(const_CAST_bitset(c), start_rank,
+ begin, end, ans);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_rank_many(const_CAST_array(c), start_rank,
+ begin, end, ans);
+ case RUN_CONTAINER_TYPE:
+ return run_container_rank_many(const_CAST_run(c), start_rank, begin,
+ end, ans);
+ default:
+ assert(false);
+ roaring_unreachable;
+ }
+ assert(false);
+ roaring_unreachable;
+ return 0;
+}
+
+// return the index of x, if not exsist return -1
+static inline int container_get_index(const container_t *c, uint8_t type,
+ uint16_t x) {
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_get_index(const_CAST_bitset(c), x);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_get_index(const_CAST_array(c), x);
+ case RUN_CONTAINER_TYPE:
+ return run_container_get_index(const_CAST_run(c), x);
+ default:
+ assert(false);
+ roaring_unreachable;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return false;
}
@@ -6623,11 +6228,9 @@ static inline int container_rank(
* The type of the first container may change. Returns the modified
* (and possibly new) container.
*/
-static inline container_t *container_add_range(
- container_t *c, uint8_t type,
- uint32_t min, uint32_t max,
- uint8_t *result_type
-){
+static inline container_t *container_add_range(container_t *c, uint8_t type,
+ uint32_t min, uint32_t max,
+ uint8_t *result_type) {
// NB: when selecting new container type, we perform only inexpensive checks
switch (type) {
case BITSET_CONTAINER_TYPE: {
@@ -6636,8 +6239,8 @@ static inline container_t *container_add_range(
int32_t union_cardinality = 0;
union_cardinality += bitset->cardinality;
union_cardinality += max - min + 1;
- union_cardinality -= bitset_lenrange_cardinality(bitset->words,
- min, max-min);
+ union_cardinality -=
+ bitset_lenrange_cardinality(bitset->words, min, max - min);
if (union_cardinality == INT32_C(0x10000)) {
*result_type = RUN_CONTAINER_TYPE;
@@ -6652,16 +6255,21 @@ static inline container_t *container_add_range(
case ARRAY_CONTAINER_TYPE: {
array_container_t *array = CAST_array(c);
- int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
- int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
- int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
+ int32_t nvals_greater =
+ count_greater(array->array, array->cardinality, (uint16_t)max);
+ int32_t nvals_less =
+ count_less(array->array, array->cardinality - nvals_greater,
+ (uint16_t)min);
+ int32_t union_cardinality =
+ nvals_less + (max - min + 1) + nvals_greater;
if (union_cardinality == INT32_C(0x10000)) {
*result_type = RUN_CONTAINER_TYPE;
return run_container_create_range(0, INT32_C(0x10000));
} else if (union_cardinality <= DEFAULT_MAX_SIZE) {
*result_type = ARRAY_CONTAINER_TYPE;
- array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
+ array_container_add_range_nvals(array, min, max, nvals_less,
+ nvals_greater);
return array;
} else {
*result_type = BITSET_CONTAINER_TYPE;
@@ -6674,14 +6282,19 @@ static inline container_t *container_add_range(
case RUN_CONTAINER_TYPE: {
run_container_t *run = CAST_run(c);
- int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
- int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
+ int32_t nruns_greater =
+ rle16_count_greater(run->runs, run->n_runs, (uint16_t)max);
+ int32_t nruns_less = rle16_count_less(
+ run->runs, run->n_runs - nruns_greater, (uint16_t)min);
- int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t);
- int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ int32_t run_size_bytes =
+ (nruns_less + 1 + nruns_greater) * sizeof(rle16_t);
+ int32_t bitset_size_bytes =
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
if (run_size_bytes <= bitset_size_bytes) {
- run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
+ run_container_add_range_nruns(run, min, max, nruns_less,
+ nruns_greater);
*result_type = RUN_CONTAINER_TYPE;
return run;
} else {
@@ -6689,7 +6302,7 @@ static inline container_t *container_add_range(
}
}
default:
- __builtin_unreachable();
+ roaring_unreachable;
}
}
@@ -6701,30 +6314,30 @@ static inline container_t *container_add_range(
* - pointer to a newly-allocated container (if it is more efficient)
*
* If the returned pointer is different from $container, then a new container
- * has been created and the caller is responsible for freeing the original container.
+ * has been created and the caller is responsible for freeing the original
+ * container.
*/
-static inline container_t *container_remove_range(
- container_t *c, uint8_t type,
- uint32_t min, uint32_t max,
- uint8_t *result_type
-){
- switch (type) {
+static inline container_t *container_remove_range(container_t *c, uint8_t type,
+ uint32_t min, uint32_t max,
+ uint8_t *result_type) {
+ switch (type) {
case BITSET_CONTAINER_TYPE: {
bitset_container_t *bitset = CAST_bitset(c);
- int32_t result_cardinality = bitset->cardinality -
- bitset_lenrange_cardinality(bitset->words, min, max-min);
+ int32_t result_cardinality =
+ bitset->cardinality -
+ bitset_lenrange_cardinality(bitset->words, min, max - min);
if (result_cardinality == 0) {
return NULL;
} else if (result_cardinality <= DEFAULT_MAX_SIZE) {
*result_type = ARRAY_CONTAINER_TYPE;
- bitset_reset_range(bitset->words, min, max+1);
+ bitset_reset_range(bitset->words, min, max + 1);
bitset->cardinality = result_cardinality;
return array_container_from_bitset(bitset);
} else {
*result_type = BITSET_CONTAINER_TYPE;
- bitset_reset_range(bitset->words, min, max+1);
+ bitset_reset_range(bitset->words, min, max + 1);
bitset->cardinality = result_cardinality;
return bitset;
}
@@ -6732,16 +6345,19 @@ static inline container_t *container_remove_range(
case ARRAY_CONTAINER_TYPE: {
array_container_t *array = CAST_array(c);
- int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
- int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
+ int32_t nvals_greater =
+ count_greater(array->array, array->cardinality, (uint16_t)max);
+ int32_t nvals_less =
+ count_less(array->array, array->cardinality - nvals_greater,
+ (uint16_t)min);
int32_t result_cardinality = nvals_less + nvals_greater;
if (result_cardinality == 0) {
return NULL;
} else {
*result_type = ARRAY_CONTAINER_TYPE;
- array_container_remove_range(array, nvals_less,
- array->cardinality - result_cardinality);
+ array_container_remove_range(
+ array, nvals_less, array->cardinality - result_cardinality);
return array;
}
}
@@ -6751,7 +6367,8 @@ static inline container_t *container_remove_range(
if (run->n_runs == 0) {
return NULL;
}
- if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) {
+ if (min <= run_container_minimum(run) &&
+ max >= run_container_maximum(run)) {
return NULL;
}
@@ -6759,12 +6376,79 @@ static inline container_t *container_remove_range(
return convert_run_to_efficient_container(run, result_type);
}
default:
- __builtin_unreachable();
- }
+ roaring_unreachable;
+ }
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+using api::roaring_container_iterator_t;
+#endif
+
+/**
+ * Initializes the iterator at the first entry in the container.
+ */
+roaring_container_iterator_t container_init_iterator(const container_t *c,
+ uint8_t typecode,
+ uint16_t *value);
+
+/**
+ * Initializes the iterator at the last entry in the container.
+ */
+roaring_container_iterator_t container_init_iterator_last(const container_t *c,
+ uint8_t typecode,
+ uint16_t *value);
+
+/**
+ * Moves the iterator to the next entry. Returns true and sets `value` if a
+ * value is present.
+ */
+bool container_iterator_next(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it, uint16_t *value);
+
+/**
+ * Moves the iterator to the previous entry. Returns true and sets `value` if a
+ * value is present.
+ */
+bool container_iterator_prev(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it, uint16_t *value);
+
+/**
+ * Moves the iterator to the smallest entry that is greater than or equal to
+ * `val`. Returns true and sets `value_out` if a value is present. `value_out`
+ * should be initialized to a value.
+ */
+bool container_iterator_lower_bound(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint16_t *value_out, uint16_t val);
+
+/**
+ * Reads up to `count` entries from the container, and writes them into `buf`
+ * as `high16 | entry`. Returns true and sets `value_out` if a value is present
+ * after reading the entries. Sets `consumed` to the number of values read.
+ * `count` should be greater than zero.
+ */
+bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint32_t high16, uint32_t *buf,
+ uint32_t count, uint32_t *consumed,
+ uint16_t *value_out);
+
+/**
+ * Reads up to `count` entries from the container, and writes them into `buf`
+ * as `high48 | entry`. Returns true and sets `value_out` if a value is present
+ * after reading the entries. Sets `consumed` to the number of values read.
+ * `count` should be greater than zero.
+ */
+bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint64_t high48, uint64_t *buf,
+ uint32_t count, uint32_t *consumed,
+ uint16_t *value_out);
+
+#ifdef __cplusplus
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
#endif
@@ -6779,7 +6463,8 @@ static inline container_t *container_remove_range(
#ifdef __cplusplus
-extern "C" { namespace roaring {
+extern "C" {
+namespace roaring {
// Note: in pure C++ code, you should avoid putting `using` in header files
using api::roaring_array_t;
@@ -6853,9 +6538,8 @@ inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
/**
* Retrieves the container at index i, filling in the typecode
*/
-inline container_t *ra_get_container_at_index(
- const roaring_array_t *ra, uint16_t i, uint8_t *typecode
-){
+inline container_t *ra_get_container_at_index(const roaring_array_t *ra,
+ uint16_t i, uint8_t *typecode) {
*typecode = ra->typecodes[i];
return ra->containers[i];
}
@@ -6870,16 +6554,14 @@ inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
/**
* Add a new key-value pair at index i
*/
-void ra_insert_new_key_value_at(
- roaring_array_t *ra, int32_t i, uint16_t key,
- container_t *c, uint8_t typecode);
+void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode);
/**
* Append a new key-value pair
*/
-void ra_append(
- roaring_array_t *ra, uint16_t key,
- container_t *c, uint8_t typecode);
+void ra_append(roaring_array_t *ra, uint16_t key, container_t *c,
+ uint8_t typecode);
/**
* Append a new key-value pair to ra, cloning (in COW sense) a value from sa
@@ -6929,15 +6611,16 @@ void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
* Set the container at the corresponding index using the specified
* typecode.
*/
-inline void ra_set_container_at_index(
- const roaring_array_t *ra, int32_t i,
- container_t *c, uint8_t typecode
-){
+inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i,
+ container_t *c, uint8_t typecode) {
assert(i < ra->size);
ra->containers[i] = c;
ra->typecodes[i] = typecode;
}
+container_t *ra_get_container(roaring_array_t *ra, uint16_t x,
+ uint8_t *typecode);
+
/**
* If needed, increase the capacity of the array so that it can fit k values
* (at
@@ -6956,10 +6639,10 @@ int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos);
void ra_downsize(roaring_array_t *ra, int32_t new_length);
-inline void ra_replace_key_and_container_at_index(
- roaring_array_t *ra, int32_t i, uint16_t key,
- container_t *c, uint8_t typecode
-){
+inline void ra_replace_key_and_container_at_index(roaring_array_t *ra,
+ int32_t i, uint16_t key,
+ container_t *c,
+ uint8_t typecode) {
assert(i < ra->size);
ra->keys[i] = key;
@@ -6970,7 +6653,8 @@ inline void ra_replace_key_and_container_at_index(
// write set bits to an array
void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);
-bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans);
+bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset,
+ size_t limit, uint32_t *ans);
/**
* write a bitmap to a buffer. This is meant to be compatible with
@@ -6985,10 +6669,11 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);
* with the Java and Go versions.
* maxbytes indicates how many bytes available from buf.
* When the function returns true, roaring_array_t is populated with the data
- * and *readbytes indicates how many bytes were read. In all cases, if the function
- * returns true, then maxbytes >= *readbytes.
+ * and *readbytes indicates how many bytes were read. In all cases, if the
+ * function returns true, then maxbytes >= *readbytes.
*/
-bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes);
+bool ra_portable_deserialize(roaring_array_t *ra, const char *buf,
+ const size_t maxbytes, size_t *readbytes);
/**
* Quickly checks whether there is a serialized bitmap at the pointer,
@@ -7025,8 +6710,8 @@ uint32_t ra_portable_header_size(const roaring_array_t *ra);
static inline void ra_unshare_container_at_index(roaring_array_t *ra,
uint16_t i) {
assert(i < ra->size);
- ra->containers[i] = get_writable_copy_if_shared(ra->containers[i],
- &ra->typecodes[i]);
+ ra->containers[i] =
+ get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]);
}
/**
@@ -7034,10 +6719,9 @@ static inline void ra_unshare_container_at_index(roaring_array_t *ra,
*/
void ra_remove_at_index(roaring_array_t *ra, int32_t i);
-
/**
-* clears all containers, sets the size at 0 and shrinks the memory usage.
-*/
+ * clears all containers, sets the size at 0 and shrinks the memory usage.
+ */
void ra_reset(roaring_array_t *ra);
/**
@@ -7068,11 +6752,208 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
#ifdef __cplusplus
} // namespace internal
-} } // extern "C" { namespace roaring {
+}
+} // extern "C" { namespace roaring {
#endif
#endif
/* end file include/roaring/roaring_array.h */
+/* begin file include/roaring/art/art.h */
+#ifndef ART_ART_H
+#define ART_ART_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/*
+ * This file contains an implementation of an Adaptive Radix Tree as described
+ * in https://db.in.tum.de/~leis/papers/ART.pdf.
+ *
+ * The ART contains the keys in _byte lexographical_ order.
+ *
+ * Other features:
+ * * Fixed 48 bit key length: all keys are assumed to be be 48 bits in size.
+ * This allows us to put the key and key prefixes directly in nodes, reducing
+ * indirection at no additional memory overhead.
+ * * Key compression: the only inner nodes created are at points where key
+ * chunks _differ_. This means that if there are two entries with different
+ * high 48 bits, then there is only one inner node containing the common key
+ * prefix, and two leaves.
+ * * Intrusive leaves: the leaf struct is included in user values. This removes
+ * a layer of indirection.
+ */
+
+// Fixed length of keys in the ART. All keys are assumed to be of this length.
+#define ART_KEY_BYTES 6
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace internal {
+#endif
+
+typedef uint8_t art_key_chunk_t;
+typedef struct art_node_s art_node_t;
+
+/**
+ * Wrapper to allow an empty tree.
+ */
+typedef struct art_s {
+ art_node_t *root;
+} art_t;
+
+/**
+ * Values inserted into the tree have to be cast-able to art_val_t. This
+ * improves performance by reducing indirection.
+ *
+ * NOTE: Value pointers must be unique! This is because each value struct
+ * contains the key corresponding to the value.
+ */
+typedef struct art_val_s {
+ art_key_chunk_t key[ART_KEY_BYTES];
+} art_val_t;
+
+/**
+ * Compares two keys, returns their relative order:
+ * * Key 1 < key 2: returns a negative value
+ * * Key 1 == key 2: returns 0
+ * * Key 1 > key 2: returns a positive value
+ */
+int art_compare_keys(const art_key_chunk_t key1[],
+ const art_key_chunk_t key2[]);
+
+/**
+ * Inserts the given key and value.
+ */
+void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val);
+
+/**
+ * Returns the value erased, NULL if not found.
+ */
+art_val_t *art_erase(art_t *art, const art_key_chunk_t *key);
+
+/**
+ * Returns the value associated with the given key, NULL if not found.
+ */
+art_val_t *art_find(const art_t *art, const art_key_chunk_t *key);
+
+/**
+ * Returns true if the ART is empty.
+ */
+bool art_is_empty(const art_t *art);
+
+/**
+ * Frees the nodes of the ART except the values, which the user is expected to
+ * free.
+ */
+void art_free(art_t *art);
+
+/**
+ * Returns the size in bytes of the ART. Includes size of pointers to values,
+ * but not the values themselves.
+ */
+size_t art_size_in_bytes(const art_t *art);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/**
+ * Prints the ART using printf, useful for debugging.
+ */
+void art_printf(const art_t *art);
+#endif
+
+/**
+ * Callback for validating the value stored in a leaf.
+ *
+ * Should return true if the value is valid, false otherwise
+ * If false is returned, `*reason` should be set to a static string describing
+ * the reason for the failure.
+ */
+typedef bool (*art_validate_cb_t)(const art_val_t *val, const char **reason);
+
+/**
+ * Validate the ART tree, ensuring it is internally consistent.
+ */
+bool art_internal_validate(const art_t *art, const char **reason,
+ art_validate_cb_t validate_cb);
+
+/**
+ * ART-internal iterator bookkeeping. Users should treat this as an opaque type.
+ */
+typedef struct art_iterator_frame_s {
+ art_node_t *node;
+ uint8_t index_in_node;
+} art_iterator_frame_t;
+
+/**
+ * Users should only access `key` and `value` in iterators. The iterator is
+ * valid when `value != NULL`.
+ */
+typedef struct art_iterator_s {
+ art_key_chunk_t key[ART_KEY_BYTES];
+ art_val_t *value;
+
+ uint8_t depth; // Key depth
+ uint8_t frame; // Node depth
+
+ // State for each node in the ART the iterator has travelled from the root.
+ // This is `ART_KEY_BYTES + 1` because it includes state for the leaf too.
+ art_iterator_frame_t frames[ART_KEY_BYTES + 1];
+} art_iterator_t;
+
+/**
+ * Creates an iterator initialzed to the first or last entry in the ART,
+ * depending on `first`. The iterator is not valid if there are no entries in
+ * the ART.
+ */
+art_iterator_t art_init_iterator(const art_t *art, bool first);
+
+/**
+ * Returns an initialized iterator positioned at a key equal to or greater than
+ * the given key, if it exists.
+ */
+art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key);
+
+/**
+ * Returns an initialized iterator positioned at a key greater than the given
+ * key, if it exists.
+ */
+art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key);
+
+/**
+ * The following iterator movement functions return true if a new entry was
+ * encountered.
+ */
+bool art_iterator_move(art_iterator_t *iterator, bool forward);
+bool art_iterator_next(art_iterator_t *iterator);
+bool art_iterator_prev(art_iterator_t *iterator);
+
+/**
+ * Moves the iterator forward to a key equal to or greater than the given key.
+ */
+bool art_iterator_lower_bound(art_iterator_t *iterator,
+ const art_key_chunk_t *key);
+
+/**
+ * Insert the value and positions the iterator at the key.
+ */
+void art_iterator_insert(art_t *art, art_iterator_t *iterator,
+ const art_key_chunk_t *key, art_val_t *val);
+
+/**
+ * Erase the value pointed at by the iterator. Moves the iterator to the next
+ * leaf. Returns the value erased or NULL if nothing was erased.
+ */
+art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace roaring
+} // namespace internal
+#endif
+
+#endif
+/* end file include/roaring/art/art.h */
/* begin file src/array_util.c */
#include <assert.h>
#include <stdbool.h>
@@ -7082,14 +6963,28 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
#include <string.h>
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+using namespace ::roaring::internal;
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
uint16_t ikey);
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
// used by intersect_vector16
ALIGNED(0x1000)
static const uint8_t shuffle_mask16[] = {
@@ -7517,7 +7412,125 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
}
return (int32_t)count;
}
-CROARING_UNTARGET_REGION
+
+ALLOW_UNALIGNED
+int array_container_to_uint32_array_vector16(void *vout, const uint16_t *array,
+ size_t cardinality,
+ uint32_t base) {
+ int outpos = 0;
+ uint32_t *out = (uint32_t *)vout;
+ size_t i = 0;
+ for (; i + sizeof(__m128i) / sizeof(uint16_t) <= cardinality;
+ i += sizeof(__m128i) / sizeof(uint16_t)) {
+ __m128i vinput = _mm_loadu_si128((const __m128i *)(array + i));
+ __m256i voutput = _mm256_add_epi32(_mm256_cvtepu16_epi32(vinput),
+ _mm256_set1_epi32(base));
+ _mm256_storeu_si256((__m256i *)(out + outpos), voutput);
+ outpos += sizeof(__m256i) / sizeof(uint32_t);
+ }
+ for (; i < cardinality; ++i) {
+ const uint32_t val = base + array[i];
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ return outpos;
+}
+
+int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
+ const uint16_t *__restrict__ B, size_t s_b) {
+ size_t count = 0;
+ size_t i_a = 0, i_b = 0;
+ const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
+ const size_t st_a = (s_a / vectorlength) * vectorlength;
+ const size_t st_b = (s_b / vectorlength) * vectorlength;
+ __m128i v_a, v_b;
+ if ((i_a < st_a) && (i_b < st_b)) {
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ __m128i tmp[2] = {_mm_setzero_si128()};
+ size_t tmp_count = 0;
+ while ((A[i_a] == 0) || (B[i_b] == 0)) {
+ const __m128i res_v = _mm_cmpestrm(
+ v_b, vectorlength, v_a, vectorlength,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ const int r = _mm_extract_epi32(res_v, 0);
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
+ __m128i p = _mm_shuffle_epi8(v_a, sm16);
+ _mm_storeu_si128((__m128i *)&((uint16_t *)tmp)[tmp_count], p);
+ tmp_count += _mm_popcnt_u32(r);
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
+ _mm_storeu_si128(tmp, _mm_setzero_si128());
+ count += tmp_count;
+ tmp_count = 0;
+ i_a += vectorlength;
+ if (i_a == st_a) break;
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ if ((i_a < st_a) && (i_b < st_b)) {
+ while (true) {
+ const __m128i res_v = _mm_cmpistrm(
+ v_b, v_a,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ const int r = _mm_extract_epi32(res_v, 0);
+ __m128i sm16 =
+ _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
+ __m128i p = _mm_shuffle_epi8(v_a, sm16);
+ _mm_storeu_si128((__m128i *)&((uint16_t *)tmp)[tmp_count], p);
+ tmp_count += _mm_popcnt_u32(r);
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
+ _mm_storeu_si128(tmp, _mm_setzero_si128());
+ count += tmp_count;
+ tmp_count = 0;
+ i_a += vectorlength;
+ if (i_a == st_a) break;
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ }
+ // tmp_count <= 8, so this does not affect efficiency so much
+ size_t i;
+ for (i = 0; i < tmp_count; i++) {
+ A[count] = ((uint16_t *)tmp)[i];
+ count++;
+ }
+ i_a += tmp_count; // We can at least jump pass $tmp_count elements in A
+ }
+ // intersect the tail using scalar intersection
+ while (i_a < s_a && i_b < s_b) {
+ uint16_t a = A[i_a];
+ uint16_t b = B[i_b];
+ if (a < b) {
+ i_a++;
+ } else if (b < a) {
+ i_b++;
+ } else {
+ A[count] = a; //==b;
+ count++;
+ i_a++;
+ i_b++;
+ }
+ }
+ return (int32_t)count;
+}
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
@@ -7589,7 +7602,7 @@ int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
}
return (int32_t)count;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
/////////
@@ -7638,15 +7651,15 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
// spotted in B, these don't get written out.
__m128i runningmask_a_found_in_b = _mm_setzero_si128();
/****
- * start of the main vectorized loop
- *****/
+ * start of the main vectorized loop
+ *****/
while (true) {
// afoundinb will contain a mask indicate for each entry in A
// whether it is seen
// in B
- const __m128i a_found_in_b =
- _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
- _SIDD_BIT_MASK);
+ const __m128i a_found_in_b = _mm_cmpistrm(
+ v_b, v_a,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
runningmask_a_found_in_b =
_mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
// we always compare the last values of A and B
@@ -7660,7 +7673,7 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
_mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
/*** next few lines are probably expensive *****/
__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
- bitmask_belongs_to_difference);
+ bitmask_belongs_to_difference);
__m128i p = _mm_shuffle_epi8(v_a, sm16);
_mm_storeu_si128((__m128i *)&C[count], p); // can overflow
count += _mm_popcnt_u32(bitmask_belongs_to_difference);
@@ -7687,15 +7700,15 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
memset(buffer, 0, 8 * sizeof(uint16_t));
memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t));
v_b = _mm_lddqu_si128((__m128i *)buffer);
- const __m128i a_found_in_b =
- _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
- _SIDD_BIT_MASK);
+ const __m128i a_found_in_b = _mm_cmpistrm(
+ v_b, v_a,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
runningmask_a_found_in_b =
_mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
const int bitmask_belongs_to_difference =
_mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
- bitmask_belongs_to_difference);
+ bitmask_belongs_to_difference);
__m128i p = _mm_shuffle_epi8(v_a, sm16);
_mm_storeu_si128((__m128i *)&C[count], p); // can overflow
count += _mm_popcnt_u32(bitmask_belongs_to_difference);
@@ -7719,79 +7732,77 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
}
}
if (i_a < s_a) {
- if(C == A) {
- assert((size_t)count <= i_a);
- if((size_t)count < i_a) {
- memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
- }
+ if (C == A) {
+ assert((size_t)count <= i_a);
+ if ((size_t)count < i_a) {
+ memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
+ }
} else {
- for(size_t i = 0; i < (s_a - i_a); i++) {
+ size_t i;
+
+ for (i = 0; i < (s_a - i_a); i++) {
C[count + i] = A[i + i_a];
- }
+ }
}
count += (int32_t)(s_a - i_a);
}
return count;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
#endif // CROARING_IS_X64
-
-
/**
-* Branchless binary search going after 4 values at once.
-* Assumes that array is sorted.
-* You have that array[*index1] >= target1, array[*index12] >= target2, ...
-* except when *index1 = n, in which case you know that all values in array are
-* smaller than target1, and so forth.
-* It has logarithmic complexity.
-*/
+ * Branchless binary search going after 4 values at once.
+ * Assumes that array is sorted.
+ * You have that array[*index1] >= target1, array[*index12] >= target2, ...
+ * except when *index1 = n, in which case you know that all values in array are
+ * smaller than target1, and so forth.
+ * It has logarithmic complexity.
+ */
static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1,
- uint16_t target2, uint16_t target3, uint16_t target4,
- int32_t *index1, int32_t *index2, int32_t *index3,
- int32_t *index4) {
- const uint16_t *base1 = array;
- const uint16_t *base2 = array;
- const uint16_t *base3 = array;
- const uint16_t *base4 = array;
- if (n == 0)
- return;
- while (n > 1) {
- int32_t half = n >> 1;
- base1 = (base1[half] < target1) ? &base1[half] : base1;
- base2 = (base2[half] < target2) ? &base2[half] : base2;
- base3 = (base3[half] < target3) ? &base3[half] : base3;
- base4 = (base4[half] < target4) ? &base4[half] : base4;
- n -= half;
- }
- *index1 = (int32_t)((*base1 < target1) + base1 - array);
- *index2 = (int32_t)((*base2 < target2) + base2 - array);
- *index3 = (int32_t)((*base3 < target3) + base3 - array);
- *index4 = (int32_t)((*base4 < target4) + base4 - array);
+ uint16_t target2, uint16_t target3, uint16_t target4,
+ int32_t *index1, int32_t *index2, int32_t *index3,
+ int32_t *index4) {
+ const uint16_t *base1 = array;
+ const uint16_t *base2 = array;
+ const uint16_t *base3 = array;
+ const uint16_t *base4 = array;
+ if (n == 0) return;
+ while (n > 1) {
+ int32_t half = n >> 1;
+ base1 = (base1[half] < target1) ? &base1[half] : base1;
+ base2 = (base2[half] < target2) ? &base2[half] : base2;
+ base3 = (base3[half] < target3) ? &base3[half] : base3;
+ base4 = (base4[half] < target4) ? &base4[half] : base4;
+ n -= half;
+ }
+ *index1 = (int32_t)((*base1 < target1) + base1 - array);
+ *index2 = (int32_t)((*base2 < target2) + base2 - array);
+ *index3 = (int32_t)((*base3 < target3) + base3 - array);
+ *index4 = (int32_t)((*base4 < target4) + base4 - array);
}
/**
-* Branchless binary search going after 2 values at once.
-* Assumes that array is sorted.
-* You have that array[*index1] >= target1, array[*index12] >= target2.
-* except when *index1 = n, in which case you know that all values in array are
-* smaller than target1, and so forth.
-* It has logarithmic complexity.
-*/
+ * Branchless binary search going after 2 values at once.
+ * Assumes that array is sorted.
+ * You have that array[*index1] >= target1, array[*index12] >= target2.
+ * except when *index1 = n, in which case you know that all values in array are
+ * smaller than target1, and so forth.
+ * It has logarithmic complexity.
+ */
static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
- uint16_t target2, int32_t *index1, int32_t *index2) {
- const uint16_t *base1 = array;
- const uint16_t *base2 = array;
- if (n == 0)
- return;
- while (n > 1) {
- int32_t half = n >> 1;
- base1 = (base1[half] < target1) ? &base1[half] : base1;
- base2 = (base2[half] < target2) ? &base2[half] : base2;
- n -= half;
- }
- *index1 = (int32_t)((*base1 < target1) + base1 - array);
- *index2 = (int32_t)((*base2 < target2) + base2 - array);
+ uint16_t target2, int32_t *index1, int32_t *index2) {
+ const uint16_t *base1 = array;
+ const uint16_t *base2 = array;
+ if (n == 0) return;
+ while (n > 1) {
+ int32_t half = n >> 1;
+ base1 = (base1[half] < target1) ? &base1[half] : base1;
+ base2 = (base2[half] < target2) ? &base2[half] : base2;
+ n -= half;
+ }
+ *index1 = (int32_t)((*base1 < target1) + base1 - array);
+ *index2 = (int32_t)((*base2 < target2) + base2 - array);
}
/* Computes the intersection between one small and one large set of uint16_t.
@@ -7801,61 +7812,60 @@ static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
* galloping search in some instances.
*/
int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
- const uint16_t *large, size_t size_l,
- uint16_t *buffer) {
- size_t pos = 0, idx_l = 0, idx_s = 0;
+ const uint16_t *large, size_t size_l,
+ uint16_t *buffer) {
+ size_t pos = 0, idx_l = 0, idx_s = 0;
- if (0 == size_s) {
- return 0;
- }
- int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
- while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
- uint16_t target1 = small[idx_s];
- uint16_t target2 = small[idx_s + 1];
- uint16_t target3 = small[idx_s + 2];
- uint16_t target4 = small[idx_s + 3];
- binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3,
- target4, &index1, &index2, &index3, &index4);
- if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
- buffer[pos++] = target1;
- }
- if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
- buffer[pos++] = target2;
- }
- if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
- buffer[pos++] = target3;
- }
- if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
- buffer[pos++] = target4;
- }
- idx_s += 4;
- idx_l += index4;
- }
- if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
- uint16_t target1 = small[idx_s];
- uint16_t target2 = small[idx_s + 1];
- binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1,
- &index2);
- if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
- buffer[pos++] = target1;
- }
- if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
- buffer[pos++] = target2;
- }
- idx_s += 2;
- idx_l += index2;
- }
- if ((idx_s < size_s) && (idx_l < size_l)) {
- uint16_t val_s = small[idx_s];
- int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
- if (index >= 0)
- buffer[pos++] = val_s;
- }
- return (int32_t)pos;
+ if (0 == size_s) {
+ return 0;
+ }
+ int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
+ while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
+ uint16_t target1 = small[idx_s];
+ uint16_t target2 = small[idx_s + 1];
+ uint16_t target3 = small[idx_s + 2];
+ uint16_t target4 = small[idx_s + 3];
+ binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1,
+ target2, target3, target4, &index1, &index2, &index3,
+ &index4);
+ if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
+ buffer[pos++] = target1;
+ }
+ if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
+ buffer[pos++] = target2;
+ }
+ if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
+ buffer[pos++] = target3;
+ }
+ if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
+ buffer[pos++] = target4;
+ }
+ idx_s += 4;
+ idx_l += index4;
+ }
+ if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
+ uint16_t target1 = small[idx_s];
+ uint16_t target2 = small[idx_s + 1];
+ binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1,
+ target2, &index1, &index2);
+ if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
+ buffer[pos++] = target1;
+ }
+ if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
+ buffer[pos++] = target2;
+ }
+ idx_s += 2;
+ idx_l += index2;
+ }
+ if ((idx_s < size_s) && (idx_l < size_l)) {
+ uint16_t val_s = small[idx_s];
+ int32_t index =
+ binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
+ if (index >= 0) buffer[pos++] = val_s;
+ }
+ return (int32_t)pos;
}
-
-
// TODO: this could be accelerated, possibly, by using binarySearch4 as above.
int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
size_t size_s,
@@ -7893,7 +7903,7 @@ int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
}
bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s,
- const uint16_t *large, size_t size_l) {
+ const uint16_t *large, size_t size_l) {
size_t idx_l = 0, idx_s = 0;
if (0 == size_s) {
@@ -7944,7 +7954,7 @@ int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
goto SKIP_FIRST_COMPARE;
}
}
- return (int32_t)(out - initout); // NOTREACHED
+ // return (int32_t)(out - initout); // NOTREACHED
}
int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
@@ -7969,12 +7979,11 @@ int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
goto SKIP_FIRST_COMPARE;
}
}
- return answer; // NOTREACHED
+ // return answer; // NOTREACHED
}
-
bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
- const uint16_t *B, const size_t lenB) {
+ const uint16_t *B, const size_t lenB) {
if (lenA == 0 || lenB == 0) return 0;
const uint16_t *endA = A + lenA;
const uint16_t *endB = B + lenB;
@@ -7996,8 +8005,6 @@ bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
return false; // NOTREACHED
}
-
-
/**
* Generic intersection function.
*/
@@ -8024,7 +8031,7 @@ size_t intersection_uint32(const uint32_t *A, const size_t lenA,
goto SKIP_FIRST_COMPARE;
}
}
- return (out - initout); // NOTREACHED
+ // return (out - initout); // NOTREACHED
}
size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
@@ -8049,7 +8056,7 @@ size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
goto SKIP_FIRST_COMPARE;
}
}
- return card; // NOTREACHED
+ // return card; // NOTREACHED
}
// can one vectorize the computation of the union? (Update: Yes! See
@@ -8180,7 +8187,7 @@ int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
return pos_out;
}
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
/***
* start of the SIMD 16-bit union code
@@ -8222,7 +8229,7 @@ static inline void sse_merge(const __m128i *vInput1,
*vecMax = _mm_max_epu16(vecTmp, *vecMax);
*vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
// used by store_unique, generated by simdunion.py
static uint8_t uniqshuf[] = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
@@ -8581,13 +8588,13 @@ static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
_mm_storeu_si128((__m128i *)output, val);
return numberofnewvalues;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
// working in-place, this function overwrites the repeated values
// could be avoided?
static inline uint32_t unique(uint16_t *out, uint32_t len) {
- uint32_t pos = 1;
- for (uint32_t i = 1; i < len; ++i) {
+ uint32_t pos = 1, i;
+ for (i = 1; i < len; ++i) {
if (out[i] != out[i - 1]) {
out[pos++] = out[i];
}
@@ -8682,7 +8689,7 @@ uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
}
return len;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
/**
* End of the SIMD 16-bit union code
@@ -8711,13 +8718,13 @@ static inline int store_unique_xor(__m128i old, __m128i newval,
_mm_storeu_si128((__m128i *)output, val);
return numberofnewvalues;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
// working in-place, this function overwrites the repeated values
// could be avoided? Warning: assumes len > 0
static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
- uint32_t pos = 1;
- for (uint32_t i = 1; i < len; ++i) {
+ uint32_t pos = 1, i;
+ for (i = 1; i < len; ++i) {
if (out[i] != out[i - 1]) {
out[pos++] = out[i];
} else
@@ -8795,8 +8802,8 @@ uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
// last value of vecMax,
// we store to "buffer"
int leftoversize = store_unique_xor(laststore, vecMax, buffer);
- uint16_t vec7 = _mm_extract_epi16(vecMax, 7);
- uint16_t vec6 = _mm_extract_epi16(vecMax, 6);
+ uint16_t vec7 = (uint16_t)_mm_extract_epi16(vecMax, 7);
+ uint16_t vec6 = (uint16_t)_mm_extract_epi16(vecMax, 6);
if (vec7 != vec6) buffer[leftoversize++] = vec7;
if (pos1 == len1) {
memcpy(buffer + leftoversize, array1 + 8 * pos1,
@@ -8829,7 +8836,7 @@ uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
}
return len;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
/**
* End of SIMD 16-bit XOR code
*/
@@ -8929,53 +8936,107 @@ size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
return pos;
}
-
-
-size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
- size_t size_2, uint16_t *buffer) {
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
+size_t fast_union_uint16(const uint16_t *set_1, size_t size_1,
+ const uint16_t *set_2, size_t size_2,
+ uint16_t *buffer) {
+#if CROARING_IS_X64
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
// compute union with smallest array first
- if (size_1 < size_2) {
- return union_vector16(set_1, (uint32_t)size_1,
- set_2, (uint32_t)size_2, buffer);
- } else {
- return union_vector16(set_2, (uint32_t)size_2,
- set_1, (uint32_t)size_1, buffer);
- }
+ if (size_1 < size_2) {
+ return union_vector16(set_1, (uint32_t)size_1, set_2,
+ (uint32_t)size_2, buffer);
+ } else {
+ return union_vector16(set_2, (uint32_t)size_2, set_1,
+ (uint32_t)size_1, buffer);
+ }
} else {
- // compute union with smallest array first
- if (size_1 < size_2) {
- return union_uint16(
- set_1, size_1, set_2, size_2, buffer);
- } else {
- return union_uint16(
- set_2, size_2, set_1, size_1, buffer);
- }
+ // compute union with smallest array first
+ if (size_1 < size_2) {
+ return union_uint16(set_1, size_1, set_2, size_2, buffer);
+ } else {
+ return union_uint16(set_2, size_2, set_1, size_1, buffer);
+ }
}
#else
// compute union with smallest array first
if (size_1 < size_2) {
- return union_uint16(
- set_1, size_1, set_2, size_2, buffer);
+ return union_uint16(set_1, size_1, set_2, size_2, buffer);
} else {
- return union_uint16(
- set_2, size_2, set_1, size_1, buffer);
+ return union_uint16(set_2, size_2, set_1, size_1, buffer);
}
#endif
}
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+static inline bool _avx512_memequals(const void *s1, const void *s2, size_t n) {
+ const uint8_t *ptr1 = (const uint8_t *)s1;
+ const uint8_t *ptr2 = (const uint8_t *)s2;
+ const uint8_t *end1 = ptr1 + n;
+ const uint8_t *end8 = ptr1 + ((n >> 3) << 3);
+ const uint8_t *end32 = ptr1 + ((n >> 5) << 5);
+ const uint8_t *end64 = ptr1 + ((n >> 6) << 6);
+
+ while (ptr1 < end64) {
+ __m512i r1 = _mm512_loadu_si512((const __m512i *)ptr1);
+ __m512i r2 = _mm512_loadu_si512((const __m512i *)ptr2);
+
+ uint64_t mask = _mm512_cmpeq_epi8_mask(r1, r2);
+
+ if (mask != UINT64_MAX) {
+ return false;
+ }
+
+ ptr1 += 64;
+ ptr2 += 64;
+ }
+
+ while (ptr1 < end32) {
+ __m256i r1 = _mm256_loadu_si256((const __m256i *)ptr1);
+ __m256i r2 = _mm256_loadu_si256((const __m256i *)ptr2);
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
+ if ((uint32_t)mask != UINT32_MAX) {
+ return false;
+ }
+ ptr1 += 32;
+ ptr2 += 32;
+ }
+
+ while (ptr1 < end8) {
+ uint64_t v1, v2;
+ memcpy(&v1, ptr1, sizeof(uint64_t));
+ memcpy(&v2, ptr2, sizeof(uint64_t));
+ if (v1 != v2) {
+ return false;
+ }
+ ptr1 += 8;
+ ptr2 += 8;
+ }
+
+ while (ptr1 < end1) {
+ if (*ptr1 != *ptr2) {
+ return false;
+ }
+ ptr1++;
+ ptr2++;
+ }
+
+ return true;
+}
+CROARING_UNTARGET_AVX512
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+
CROARING_TARGET_AVX2
static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
const uint8_t *ptr1 = (const uint8_t *)s1;
const uint8_t *ptr2 = (const uint8_t *)s2;
const uint8_t *end1 = ptr1 + n;
- const uint8_t *end8 = ptr1 + n/8*8;
- const uint8_t *end32 = ptr1 + n/32*32;
+ const uint8_t *end8 = ptr1 + n / 8 * 8;
+ const uint8_t *end32 = ptr1 + n / 32 * 32;
while (ptr1 < end32) {
- __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1);
- __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2);
+ __m256i r1 = _mm256_loadu_si256((const __m256i *)ptr1);
+ __m256i r2 = _mm256_loadu_si256((const __m256i *)ptr2);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
if ((uint32_t)mask != UINT32_MAX) {
return false;
@@ -8986,8 +9047,8 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
while (ptr1 < end8) {
uint64_t v1, v2;
- memcpy(&v1,ptr1,sizeof(uint64_t));
- memcpy(&v2,ptr2,sizeof(uint64_t));
+ memcpy(&v1, ptr1, sizeof(uint64_t));
+ memcpy(&v2, ptr2, sizeof(uint64_t));
if (v1 != v2) {
return false;
}
@@ -9005,28 +9066,2461 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
return true;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
#endif
bool memequals(const void *s1, const void *s2, size_t n) {
if (n == 0) {
return true;
}
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
- return _avx2_memequals(s1, s2, n);
- } else {
- return memcmp(s1, s2, n) == 0;
- }
+#if CROARING_IS_X64
+ int support = croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if (support & ROARING_SUPPORTS_AVX512) {
+ return _avx512_memequals(s1, s2, n);
+ } else
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+ if (support & ROARING_SUPPORTS_AVX2) {
+ return _avx2_memequals(s1, s2, n);
+ } else {
+ return memcmp(s1, s2, n) == 0;
+ }
#else
return memcmp(s1, s2, n) == 0;
#endif
}
+#if CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+ALLOW_UNALIGNED
+int avx512_array_container_to_uint32_array(void *vout, const uint16_t *array,
+ size_t cardinality, uint32_t base) {
+ int outpos = 0;
+ uint32_t *out = (uint32_t *)vout;
+ size_t i = 0;
+ for (; i + sizeof(__m256i) / sizeof(uint16_t) <= cardinality;
+ i += sizeof(__m256i) / sizeof(uint16_t)) {
+ __m256i vinput = _mm256_loadu_si256((const __m256i *)(array + i));
+ __m512i voutput = _mm512_add_epi32(_mm512_cvtepu16_epi32(vinput),
+ _mm512_set1_epi32(base));
+ _mm512_storeu_si512((__m512i *)(out + outpos), voutput);
+ outpos += sizeof(__m512i) / sizeof(uint32_t);
+ }
+ for (; i < cardinality; ++i) {
+ const uint32_t val = base + array[i];
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ return outpos;
+}
+CROARING_UNTARGET_AVX512
+#endif // #if CROARING_COMPILER_SUPPORTS_AVX512
+#endif // #if CROARING_IS_X64
+
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif/* end file src/array_util.c */
+/* begin file src/art/art.c */
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+
+#define ART_NODE4_TYPE 0
+#define ART_NODE16_TYPE 1
+#define ART_NODE48_TYPE 2
+#define ART_NODE256_TYPE 3
+#define ART_NUM_TYPES 4
+
+// Node48 placeholder value to indicate no child is present at this key index.
+#define ART_NODE48_EMPTY_VAL 48
+
+// We use the least significant bit of node pointers to indicate whether a node
+// is a leaf or an inner node. This is never surfaced to the user.
+//
+// Using pointer tagging to indicate leaves not only saves a bit of memory by
+// sparing the typecode, but also allows us to use an intrusive leaf struct.
+// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon
+// deallocation of the ART, we know not to free the leaves without having to
+// dereference the leaf pointers.
+//
+// All internal operations on leaves should use CAST_LEAF before using the leaf.
+// The only places that use SET_LEAF are locations where a field is directly
+// assigned to a leaf pointer. After using SET_LEAF, the leaf should be treated
+// as a node of unknown type.
+#define IS_LEAF(p) (((uintptr_t)(p) & 1))
+#define SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1))
+#define CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1)))
+
+#define NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1)
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace internal {
+#endif
+
+typedef uint8_t art_typecode_t;
+
+// Aliasing with a "leaf" naming so that its purpose is clearer in the context
+// of the trie internals.
+typedef art_val_t art_leaf_t;
+
+typedef struct art_internal_validate_s {
+ const char **reason;
+ art_validate_cb_t validate_cb;
+
+ int depth;
+ art_key_chunk_t current_key[ART_KEY_BYTES];
+} art_internal_validate_t;
+
+// Set the reason message, and return false for convenience.
+static inline bool art_validate_fail(const art_internal_validate_t *validate,
+ const char *msg) {
+ *validate->reason = msg;
+ return false;
+}
+
+// Inner node, with prefix.
+//
+// We use a fixed-length array as a pointer would be larger than the array.
+typedef struct art_inner_node_s {
+ art_typecode_t typecode;
+ uint8_t prefix_size;
+ uint8_t prefix[ART_KEY_BYTES - 1];
+} art_inner_node_t;
+
+// Inner node types.
+
+// Node4: key[i] corresponds with children[i]. Keys are sorted.
+typedef struct art_node4_s {
+ art_inner_node_t base;
+ uint8_t count;
+ uint8_t keys[4];
+ art_node_t *children[4];
+} art_node4_t;
+
+// Node16: key[i] corresponds with children[i]. Keys are sorted.
+typedef struct art_node16_s {
+ art_inner_node_t base;
+ uint8_t count;
+ uint8_t keys[16];
+ art_node_t *children[16];
+} art_node16_t;
+
+// Node48: key[i] corresponds with children[key[i]] if key[i] !=
+// ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct indexing.
+typedef struct art_node48_s {
+ art_inner_node_t base;
+ uint8_t count;
+ // Bitset where the ith bit is set if children[i] is available
+ // Because there are at most 48 children, only the bottom 48 bits are used.
+ uint64_t available_children;
+ uint8_t keys[256];
+ art_node_t *children[48];
+} art_node48_t;
+
+// Node256: children[i] is directly indexed by key chunk. A child is present if
+// children[i] != NULL.
+typedef struct art_node256_s {
+ art_inner_node_t base;
+ uint16_t count;
+ art_node_t *children[256];
+} art_node256_t;
+
+// Helper struct to refer to a child within a node at a specific index.
+typedef struct art_indexed_child_s {
+ art_node_t *child;
+ uint8_t index;
+ art_key_chunk_t key_chunk;
+} art_indexed_child_t;
+
+static inline bool art_is_leaf(const art_node_t *node) { return IS_LEAF(node); }
+
+static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) {
+ memcpy(leaf->key, key, ART_KEY_BYTES);
+}
+
+static inline uint8_t art_get_type(const art_inner_node_t *node) {
+ return node->typecode;
+}
+
+static inline void art_init_inner_node(art_inner_node_t *node,
+ art_typecode_t typecode,
+ const art_key_chunk_t prefix[],
+ uint8_t prefix_size) {
+ node->typecode = typecode;
+ node->prefix_size = prefix_size;
+ memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t));
+}
+
+static void art_free_node(art_node_t *node);
+
+// ===================== Start of node-specific functions ======================
+
+static art_node4_t *art_node4_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size);
+static art_node16_t *art_node16_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size);
+static art_node48_t *art_node48_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size);
+static art_node256_t *art_node256_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size);
+
+static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child,
+ uint8_t key);
+static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child,
+ uint8_t key);
+static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child,
+ uint8_t key);
+static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child,
+ uint8_t key);
+
+static art_node4_t *art_node4_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size) {
+ art_node4_t *node = (art_node4_t *)roaring_malloc(sizeof(art_node4_t));
+ art_init_inner_node(&node->base, ART_NODE4_TYPE, prefix, prefix_size);
+ node->count = 0;
+ return node;
+}
+
+static void art_free_node4(art_node4_t *node) {
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ art_free_node(node->children[i]);
+ }
+ roaring_free(node);
+}
+
+static inline art_node_t *art_node4_find_child(const art_node4_t *node,
+ art_key_chunk_t key) {
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] == key) {
+ return node->children[i];
+ }
+ }
+ return NULL;
+}
+
+static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child,
+ uint8_t key) {
+ if (node->count < 4) {
+ size_t idx = 0;
+ for (; idx < node->count; ++idx) {
+ if (node->keys[idx] > key) {
+ break;
+ }
+ }
+ size_t after = node->count - idx;
+ // Shift other keys to maintain sorted order.
+ memmove(node->keys + idx + 1, node->keys + idx,
+ after * sizeof(art_key_chunk_t));
+ memmove(node->children + idx + 1, node->children + idx,
+ after * sizeof(art_node_t *));
+
+ node->children[idx] = child;
+ node->keys[idx] = key;
+ node->count++;
+ return (art_node_t *)node;
+ }
+ art_node16_t *new_node =
+ art_node16_create(node->base.prefix, node->base.prefix_size);
+ // Instead of calling insert, this could be specialized to 2x memcpy and
+ // setting the count.
+ size_t i;
+ for (i = 0; i < 4; ++i) {
+ art_node16_insert(new_node, node->children[i], node->keys[i]);
+ }
+ roaring_free(node);
+ return art_node16_insert(new_node, child, key);
+}
+
+static inline art_node_t *art_node4_erase(art_node4_t *node,
+ art_key_chunk_t key_chunk) {
+ int idx = -1;
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] == key_chunk) {
+ idx = i;
+ }
+ }
+ if (idx == -1) {
+ return (art_node_t *)node;
+ }
+ if (node->count == 2) {
+ // Only one child remains after erasing, so compress the path by
+ // removing this node.
+ uint8_t other_idx = idx ^ 1;
+ art_node_t *remaining_child = node->children[other_idx];
+ art_key_chunk_t remaining_child_key = node->keys[other_idx];
+ if (!art_is_leaf(remaining_child)) {
+ // Correct the prefix of the child node.
+ art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child;
+ memmove(inner_node->prefix + node->base.prefix_size + 1,
+ inner_node->prefix, inner_node->prefix_size);
+ memcpy(inner_node->prefix, node->base.prefix,
+ node->base.prefix_size);
+ inner_node->prefix[node->base.prefix_size] = remaining_child_key;
+ inner_node->prefix_size += node->base.prefix_size + 1;
+ }
+ roaring_free(node);
+ return remaining_child;
+ }
+ // Shift other keys to maintain sorted order.
+ size_t after_next = node->count - idx - 1;
+ memmove(node->keys + idx, node->keys + idx + 1,
+ after_next * sizeof(art_key_chunk_t));
+ memmove(node->children + idx, node->children + idx + 1,
+ after_next * sizeof(art_node_t *));
+ node->count--;
+ return (art_node_t *)node;
+}
+
+static inline void art_node4_replace(art_node4_t *node,
+ art_key_chunk_t key_chunk,
+ art_node_t *new_child) {
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] == key_chunk) {
+ node->children[i] = new_child;
+ return;
+ }
+ }
+}
+
+static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node,
+ int index) {
+ art_indexed_child_t indexed_child;
+ index++;
+ if (index >= node->count) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = node->keys[index];
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node,
+ int index) {
+ if (index > node->count) {
+ index = node->count;
+ }
+ index--;
+ art_indexed_child_t indexed_child;
+ if (index < 0) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = node->keys[index];
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node,
+ int index) {
+ art_indexed_child_t indexed_child;
+ if (index < 0 || index >= node->count) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = node->keys[index];
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node4_lower_bound(
+ art_node4_t *node, art_key_chunk_t key_chunk) {
+ art_indexed_child_t indexed_child;
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] >= key_chunk) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[i];
+ indexed_child.key_chunk = node->keys[i];
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static bool art_internal_validate_at(const art_node_t *node,
+ art_internal_validate_t validator);
+
+static bool art_node4_internal_validate(const art_node4_t *node,
+ art_internal_validate_t validator) {
+ int i;
+ if (node->count == 0) {
+ return art_validate_fail(&validator, "Node4 has no children");
+ }
+ if (node->count > 4) {
+ return art_validate_fail(&validator, "Node4 has too many children");
+ }
+ if (node->count == 1) {
+ return art_validate_fail(
+ &validator, "Node4 and child node should have been combined");
+ }
+ validator.depth++;
+ for (i = 0; i < node->count; ++i) {
+ if (i > 0) {
+ if (node->keys[i - 1] >= node->keys[i]) {
+ return art_validate_fail(
+ &validator, "Node4 keys are not strictly increasing");
+ }
+ }
+ int j;
+ for (j = i + 1; j < node->count; ++j) {
+ if (node->children[i] == node->children[j]) {
+ return art_validate_fail(&validator,
+ "Node4 has duplicate children");
+ }
+ }
+ validator.current_key[validator.depth - 1] = node->keys[i];
+ if (!art_internal_validate_at(node->children[i], validator)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static art_node16_t *art_node16_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size) {
+ art_node16_t *node = (art_node16_t *)roaring_malloc(sizeof(art_node16_t));
+ art_init_inner_node(&node->base, ART_NODE16_TYPE, prefix, prefix_size);
+ node->count = 0;
+ return node;
+}
+
+static void art_free_node16(art_node16_t *node) {
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ art_free_node(node->children[i]);
+ }
+ roaring_free(node);
+}
+
+static inline art_node_t *art_node16_find_child(const art_node16_t *node,
+ art_key_chunk_t key) {
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] == key) {
+ return node->children[i];
+ }
+ }
+ return NULL;
+}
+
+static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child,
+ uint8_t key) {
+ if (node->count < 16) {
+ size_t idx = 0;
+ for (; idx < node->count; ++idx) {
+ if (node->keys[idx] > key) {
+ break;
+ }
+ }
+ size_t after = node->count - idx;
+ // Shift other keys to maintain sorted order.
+ memmove(node->keys + idx + 1, node->keys + idx,
+ after * sizeof(art_key_chunk_t));
+ memmove(node->children + idx + 1, node->children + idx,
+ after * sizeof(art_node_t *));
+
+ node->children[idx] = child;
+ node->keys[idx] = key;
+ node->count++;
+ return (art_node_t *)node;
+ }
+ art_node48_t *new_node =
+ art_node48_create(node->base.prefix, node->base.prefix_size);
+ size_t i;
+ for (i = 0; i < 16; ++i) {
+ art_node48_insert(new_node, node->children[i], node->keys[i]);
+ }
+ roaring_free(node);
+ return art_node48_insert(new_node, child, key);
+}
+
+static inline art_node_t *art_node16_erase(art_node16_t *node,
+ uint8_t key_chunk) {
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] == key_chunk) {
+ // Shift other keys to maintain sorted order.
+ size_t after_next = node->count - i - 1;
+ memmove(node->keys + i, node->keys + i + 1,
+ after_next * sizeof(key_chunk));
+ memmove(node->children + i, node->children + i + 1,
+ after_next * sizeof(art_node_t *));
+ node->count--;
+ break;
+ }
+ }
+ if (node->count > 4) {
+ return (art_node_t *)node;
+ }
+ art_node4_t *new_node =
+ art_node4_create(node->base.prefix, node->base.prefix_size);
+ // Instead of calling insert, this could be specialized to 2x memcpy and
+ // setting the count.
+ for (i = 0; i < 4; ++i) {
+ art_node4_insert(new_node, node->children[i], node->keys[i]);
+ }
+ roaring_free(node);
+ return (art_node_t *)new_node;
+}
+
+static inline void art_node16_replace(art_node16_t *node,
+ art_key_chunk_t key_chunk,
+ art_node_t *new_child) {
+ uint8_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] == key_chunk) {
+ node->children[i] = new_child;
+ return;
+ }
+ }
+}
+
+static inline art_indexed_child_t art_node16_next_child(
+ const art_node16_t *node, int index) {
+ art_indexed_child_t indexed_child;
+ index++;
+ if (index >= node->count) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = node->keys[index];
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node16_prev_child(
+ const art_node16_t *node, int index) {
+ if (index > node->count) {
+ index = node->count;
+ }
+ index--;
+ art_indexed_child_t indexed_child;
+ if (index < 0) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = node->keys[index];
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node,
+ int index) {
+ art_indexed_child_t indexed_child;
+ if (index < 0 || index >= node->count) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = node->keys[index];
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node16_lower_bound(
+ art_node16_t *node, art_key_chunk_t key_chunk) {
+ art_indexed_child_t indexed_child;
+ size_t i;
+ for (i = 0; i < node->count; ++i) {
+ if (node->keys[i] >= key_chunk) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[i];
+ indexed_child.key_chunk = node->keys[i];
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static bool art_node16_internal_validate(const art_node16_t *node,
+ art_internal_validate_t validator) {
+ int i;
+ if (node->count <= 4) {
+ return art_validate_fail(&validator, "Node16 has too few children");
+ }
+ if (node->count > 16) {
+ return art_validate_fail(&validator, "Node16 has too many children");
+ }
+ validator.depth++;
+ for (i = 0; i < node->count; ++i) {
+ int j;
+ if (i > 0) {
+ if (node->keys[i - 1] >= node->keys[i]) {
+ return art_validate_fail(
+ &validator, "Node16 keys are not strictly increasing");
+ }
+ }
+ for (j = i + 1; j < node->count; ++j) {
+ if (node->children[i] == node->children[j]) {
+ return art_validate_fail(&validator,
+ "Node16 has duplicate children");
+ }
+ }
+ validator.current_key[validator.depth - 1] = node->keys[i];
+ if (!art_internal_validate_at(node->children[i], validator)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static art_node48_t *art_node48_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size) {
+ art_node48_t *node = (art_node48_t *)roaring_malloc(sizeof(art_node48_t));
+ art_init_inner_node(&node->base, ART_NODE48_TYPE, prefix, prefix_size);
+ node->count = 0;
+ node->available_children = NODE48_AVAILABLE_CHILDREN_MASK;
+ size_t i; for(i = 0; i < 256; ++i) {
+ node->keys[i] = ART_NODE48_EMPTY_VAL;
+ }
+ return node;
+}
+
+static void art_free_node48(art_node48_t *node) {
+ uint64_t used_children =
+ (node->available_children) ^ NODE48_AVAILABLE_CHILDREN_MASK;
+ while (used_children != 0) {
+ // We checked above that used_children is not zero
+ uint8_t child_idx = roaring_trailing_zeroes(used_children);
+ art_free_node(node->children[child_idx]);
+ used_children &= ~(UINT64_C(1) << child_idx);
+ }
+ roaring_free(node);
+}
+
+static inline art_node_t *art_node48_find_child(const art_node48_t *node,
+ art_key_chunk_t key) {
+ uint8_t val_idx = node->keys[key];
+ if (val_idx != ART_NODE48_EMPTY_VAL) {
+ return node->children[val_idx];
+ }
+ return NULL;
+}
+
+static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child,
+ uint8_t key) {
+ if (node->count < 48) {
+ // node->available_children is only zero when the node is full (count ==
+ // 48), we just checked count < 48
+ uint8_t val_idx = roaring_trailing_zeroes(node->available_children);
+ node->keys[key] = val_idx;
+ node->children[val_idx] = child;
+ node->count++;
+ node->available_children &= ~(UINT64_C(1) << val_idx);
+ return (art_node_t *)node;
+ }
+ art_node256_t *new_node =
+ art_node256_create(node->base.prefix, node->base.prefix_size);
+ size_t i; for(i = 0; i < 256; ++i) {
+ uint8_t val_idx = node->keys[i];
+ if (val_idx != ART_NODE48_EMPTY_VAL) {
+ art_node256_insert(new_node, node->children[val_idx], i);
+ }
+ }
+ roaring_free(node);
+ return art_node256_insert(new_node, child, key);
+}
+
+static inline art_node_t *art_node48_erase(art_node48_t *node,
+ uint8_t key_chunk) {
+ uint8_t val_idx = node->keys[key_chunk];
+ if (val_idx == ART_NODE48_EMPTY_VAL) {
+ return (art_node_t *)node;
+ }
+ node->keys[key_chunk] = ART_NODE48_EMPTY_VAL;
+ node->available_children |= UINT64_C(1) << val_idx;
+ node->count--;
+ if (node->count > 16) {
+ return (art_node_t *)node;
+ }
+
+ art_node16_t *new_node =
+ art_node16_create(node->base.prefix, node->base.prefix_size);
+ size_t i; for(i = 0; i < 256; ++i) {
+ val_idx = node->keys[i];
+ if (val_idx != ART_NODE48_EMPTY_VAL) {
+ art_node16_insert(new_node, node->children[val_idx], i);
+ }
+ }
+ roaring_free(node);
+ return (art_node_t *)new_node;
+}
+
+static inline void art_node48_replace(art_node48_t *node,
+ art_key_chunk_t key_chunk,
+ art_node_t *new_child) {
+ uint8_t val_idx = node->keys[key_chunk];
+ assert(val_idx != ART_NODE48_EMPTY_VAL);
+ node->children[val_idx] = new_child;
+}
+
+static inline art_indexed_child_t art_node48_next_child(
+ const art_node48_t *node, int index) {
+ art_indexed_child_t indexed_child;
+ index++;
+ size_t i; for(i = index; i < 256; ++i) {
+ if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[node->keys[i]];
+ indexed_child.key_chunk = i;
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node48_prev_child(
+ const art_node48_t *node, int index) {
+ if (index > 256) {
+ index = 256;
+ }
+ index--;
+ art_indexed_child_t indexed_child;
+ int i; for(i = index; i >= 0; --i) {
+ if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[node->keys[i]];
+ indexed_child.key_chunk = i;
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node,
+ int index) {
+ art_indexed_child_t indexed_child;
+ if (index < 0 || index >= 256) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[node->keys[index]];
+ indexed_child.key_chunk = index;
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node48_lower_bound(
+ art_node48_t *node, art_key_chunk_t key_chunk) {
+ art_indexed_child_t indexed_child;
+ size_t i; for(i = key_chunk; i < 256; ++i) {
+ if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[node->keys[i]];
+ indexed_child.key_chunk = i;
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static bool art_node48_internal_validate(const art_node48_t *node,
+ art_internal_validate_t validator) {
+ if (node->count <= 16) {
+ return art_validate_fail(&validator, "Node48 has too few children");
+ }
+ if (node->count > 48) {
+ return art_validate_fail(&validator, "Node48 has too many children");
+ }
+ uint64_t used_children = 0;
+ int i; for(i = 0; i < 256; ++i) {
+ uint8_t child_idx = node->keys[i];
+ if (child_idx != ART_NODE48_EMPTY_VAL) {
+ if (used_children & (UINT64_C(1) << child_idx)) {
+ return art_validate_fail(
+ &validator, "Node48 keys point to the same child index");
+ }
+
+ art_node_t *child = node->children[child_idx];
+ if (child == NULL) {
+ return art_validate_fail(&validator, "Node48 has a NULL child");
+ }
+ used_children |= UINT64_C(1) << child_idx;
+ }
+ }
+ uint64_t expected_used_children =
+ (node->available_children) ^ NODE48_AVAILABLE_CHILDREN_MASK;
+ if (used_children != expected_used_children) {
+ return art_validate_fail(
+ &validator,
+ "Node48 available_children does not match actual children");
+ }
+ while (used_children != 0) {
+ uint8_t child_idx = roaring_trailing_zeroes(used_children);
+ used_children &= used_children - 1;
+
+ uint64_t other_children = used_children;
+ while (other_children != 0) {
+ uint8_t other_child_idx = roaring_trailing_zeroes(other_children);
+ if (node->children[child_idx] == node->children[other_child_idx]) {
+ return art_validate_fail(&validator,
+ "Node48 has duplicate children");
+ }
+ other_children &= other_children - 1;
+ }
+ }
+
+ validator.depth++;
+ ; for(i = 0; i < 256; ++i) {
+ if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
+ validator.current_key[validator.depth - 1] = i;
+ if (!art_internal_validate_at(node->children[node->keys[i]],
+ validator)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static art_node256_t *art_node256_create(const art_key_chunk_t prefix[],
+ uint8_t prefix_size) {
+ art_node256_t *node =
+ (art_node256_t *)roaring_malloc(sizeof(art_node256_t));
+ art_init_inner_node(&node->base, ART_NODE256_TYPE, prefix, prefix_size);
+ node->count = 0;
+ size_t i; for(i = 0; i < 256; ++i) {
+ node->children[i] = NULL;
+ }
+ return node;
+}
+
+static void art_free_node256(art_node256_t *node) {
+ size_t i; for(i = 0; i < 256; ++i) {
+ if (node->children[i] != NULL) {
+ art_free_node(node->children[i]);
+ }
+ }
+ roaring_free(node);
+}
+
+static inline art_node_t *art_node256_find_child(const art_node256_t *node,
+ art_key_chunk_t key) {
+ return node->children[key];
+}
+
+static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child,
+ uint8_t key) {
+ node->children[key] = child;
+ node->count++;
+ return (art_node_t *)node;
+}
+
+static inline art_node_t *art_node256_erase(art_node256_t *node,
+ uint8_t key_chunk) {
+ node->children[key_chunk] = NULL;
+ node->count--;
+ if (node->count > 48) {
+ return (art_node_t *)node;
+ }
+
+ art_node48_t *new_node =
+ art_node48_create(node->base.prefix, node->base.prefix_size);
+ size_t i; for(i = 0; i < 256; ++i) {
+ if (node->children[i] != NULL) {
+ art_node48_insert(new_node, node->children[i], i);
+ }
+ }
+ roaring_free(node);
+ return (art_node_t *)new_node;
+}
+
+static inline void art_node256_replace(art_node256_t *node,
+ art_key_chunk_t key_chunk,
+ art_node_t *new_child) {
+ node->children[key_chunk] = new_child;
+}
+
+static inline art_indexed_child_t art_node256_next_child(
+ const art_node256_t *node, int index) {
+ art_indexed_child_t indexed_child;
+ index++;
+ size_t i; for(i = index; i < 256; ++i) {
+ if (node->children[i] != NULL) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[i];
+ indexed_child.key_chunk = i;
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node256_prev_child(
+ const art_node256_t *node, int index) {
+ if (index > 256) {
+ index = 256;
+ }
+ index--;
+ art_indexed_child_t indexed_child;
+ int i; for(i = index; i >= 0; --i) {
+ if (node->children[i] != NULL) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[i];
+ indexed_child.key_chunk = i;
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node256_child_at(
+ const art_node256_t *node, int index) {
+ art_indexed_child_t indexed_child;
+ if (index < 0 || index >= 256) {
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ indexed_child.index = index;
+ indexed_child.child = node->children[index];
+ indexed_child.key_chunk = index;
+ return indexed_child;
+}
+
+static inline art_indexed_child_t art_node256_lower_bound(
+ art_node256_t *node, art_key_chunk_t key_chunk) {
+ art_indexed_child_t indexed_child;
+ size_t i; for(i = key_chunk; i < 256; ++i) {
+ if (node->children[i] != NULL) {
+ indexed_child.index = i;
+ indexed_child.child = node->children[i];
+ indexed_child.key_chunk = i;
+ return indexed_child;
+ }
+ }
+ indexed_child.child = NULL;
+ return indexed_child;
+}
+
+static bool art_node256_internal_validate(const art_node256_t *node,
+ art_internal_validate_t validator) {
+ if (node->count <= 48) {
+ return art_validate_fail(&validator, "Node256 has too few children");
+ }
+ if (node->count > 256) {
+ return art_validate_fail(&validator, "Node256 has too many children");
+ }
+ validator.depth++;
+ int actual_count = 0;
+ int i; for(i = 0; i < 256; ++i) {
+ if (node->children[i] != NULL) {
+ actual_count++;
+
+ int j; for(j = i + 1; j < 256; ++j) {
+ if (node->children[i] == node->children[j]) {
+ return art_validate_fail(&validator,
+ "Node256 has duplicate children");
+ }
+ }
+
+ validator.current_key[validator.depth - 1] = i;
+ if (!art_internal_validate_at(node->children[i], validator)) {
+ return false;
+ }
+ }
+ }
+ if (actual_count != node->count) {
+ return art_validate_fail(
+ &validator, "Node256 count does not match actual children");
+ }
+ return true;
+}
+
+// Finds the child with the given key chunk in the inner node, returns NULL if
+// no such child is found.
+static art_node_t *art_find_child(const art_inner_node_t *node,
+ art_key_chunk_t key_chunk) {
+ switch (art_get_type(node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_find_child((art_node4_t *)node, key_chunk);
+ case ART_NODE16_TYPE:
+ return art_node16_find_child((art_node16_t *)node, key_chunk);
+ case ART_NODE48_TYPE:
+ return art_node48_find_child((art_node48_t *)node, key_chunk);
+ case ART_NODE256_TYPE:
+ return art_node256_find_child((art_node256_t *)node, key_chunk);
+ default:
+ assert(false);
+ return NULL;
+ }
+}
+
+// Replaces the child with the given key chunk in the inner node.
+static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk,
+ art_node_t *new_child) {
+ switch (art_get_type(node)) {
+ case ART_NODE4_TYPE:
+ art_node4_replace((art_node4_t *)node, key_chunk, new_child);
+ break;
+ case ART_NODE16_TYPE:
+ art_node16_replace((art_node16_t *)node, key_chunk, new_child);
+ break;
+ case ART_NODE48_TYPE:
+ art_node48_replace((art_node48_t *)node, key_chunk, new_child);
+ break;
+ case ART_NODE256_TYPE:
+ art_node256_replace((art_node256_t *)node, key_chunk, new_child);
+ break;
+ default:
+ assert(false);
+ }
+}
+
+// Erases the child with the given key chunk from the inner node, returns the
+// updated node (the same as the initial node if it was not shrunk).
+static art_node_t *art_node_erase(art_inner_node_t *node,
+ art_key_chunk_t key_chunk) {
+ switch (art_get_type(node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_erase((art_node4_t *)node, key_chunk);
+ case ART_NODE16_TYPE:
+ return art_node16_erase((art_node16_t *)node, key_chunk);
+ case ART_NODE48_TYPE:
+ return art_node48_erase((art_node48_t *)node, key_chunk);
+ case ART_NODE256_TYPE:
+ return art_node256_erase((art_node256_t *)node, key_chunk);
+ default:
+ assert(false);
+ return NULL;
+ }
+}
+
+// Inserts the leaf with the given key chunk in the inner node, returns a
+// pointer to the (possibly expanded) node.
+static art_node_t *art_node_insert_leaf(art_inner_node_t *node,
+ art_key_chunk_t key_chunk,
+ art_leaf_t *leaf) {
+ art_node_t *child = (art_node_t *)(SET_LEAF(leaf));
+ switch (art_get_type(node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_insert((art_node4_t *)node, child, key_chunk);
+ case ART_NODE16_TYPE:
+ return art_node16_insert((art_node16_t *)node, child, key_chunk);
+ case ART_NODE48_TYPE:
+ return art_node48_insert((art_node48_t *)node, child, key_chunk);
+ case ART_NODE256_TYPE:
+ return art_node256_insert((art_node256_t *)node, child, key_chunk);
+ default:
+ assert(false);
+ return NULL;
+ }
+}
+
+// Frees the node and its children. Leaves are freed by the user.
+static void art_free_node(art_node_t *node) {
+ if (art_is_leaf(node)) {
+ // We leave it up to the user to free leaves.
+ return;
+ }
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE:
+ art_free_node4((art_node4_t *)node);
+ break;
+ case ART_NODE16_TYPE:
+ art_free_node16((art_node16_t *)node);
+ break;
+ case ART_NODE48_TYPE:
+ art_free_node48((art_node48_t *)node);
+ break;
+ case ART_NODE256_TYPE:
+ art_free_node256((art_node256_t *)node);
+ break;
+ default:
+ assert(false);
+ }
+}
+
+// Returns the next child in key order, or NULL if called on a leaf.
+// Provided index may be in the range [-1, 255].
+static art_indexed_child_t art_node_next_child(const art_node_t *node,
+ int index) {
+ if (art_is_leaf(node)) {
+ art_indexed_child_t indexed_child;
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_next_child((art_node4_t *)node, index);
+ case ART_NODE16_TYPE:
+ return art_node16_next_child((art_node16_t *)node, index);
+ case ART_NODE48_TYPE:
+ return art_node48_next_child((art_node48_t *)node, index);
+ case ART_NODE256_TYPE:
+ return art_node256_next_child((art_node256_t *)node, index);
+ default:
+ assert(false);
+ return (art_indexed_child_t){0};
+ }
+}
+
+// Returns the previous child in key order, or NULL if called on a leaf.
+// Provided index may be in the range [0, 256].
+static art_indexed_child_t art_node_prev_child(const art_node_t *node,
+ int index) {
+ if (art_is_leaf(node)) {
+ art_indexed_child_t indexed_child;
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_prev_child((art_node4_t *)node, index);
+ case ART_NODE16_TYPE:
+ return art_node16_prev_child((art_node16_t *)node, index);
+ case ART_NODE48_TYPE:
+ return art_node48_prev_child((art_node48_t *)node, index);
+ case ART_NODE256_TYPE:
+ return art_node256_prev_child((art_node256_t *)node, index);
+ default:
+ assert(false);
+ return (art_indexed_child_t){0};
+ }
+}
+
+// Returns the child found at the provided index, or NULL if called on a leaf.
+// Provided index is only valid if returned by art_node_(next|prev)_child.
+static art_indexed_child_t art_node_child_at(const art_node_t *node,
+ int index) {
+ if (art_is_leaf(node)) {
+ art_indexed_child_t indexed_child;
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_child_at((art_node4_t *)node, index);
+ case ART_NODE16_TYPE:
+ return art_node16_child_at((art_node16_t *)node, index);
+ case ART_NODE48_TYPE:
+ return art_node48_child_at((art_node48_t *)node, index);
+ case ART_NODE256_TYPE:
+ return art_node256_child_at((art_node256_t *)node, index);
+ default:
+ assert(false);
+ return (art_indexed_child_t){0};
+ }
+}
+
+// Returns the child with the smallest key equal to or greater than the given
+// key chunk, NULL if called on a leaf or no such child was found.
+static art_indexed_child_t art_node_lower_bound(const art_node_t *node,
+ art_key_chunk_t key_chunk) {
+ if (art_is_leaf(node)) {
+ art_indexed_child_t indexed_child;
+ indexed_child.child = NULL;
+ return indexed_child;
+ }
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE:
+ return art_node4_lower_bound((art_node4_t *)node, key_chunk);
+ case ART_NODE16_TYPE:
+ return art_node16_lower_bound((art_node16_t *)node, key_chunk);
+ case ART_NODE48_TYPE:
+ return art_node48_lower_bound((art_node48_t *)node, key_chunk);
+ case ART_NODE256_TYPE:
+ return art_node256_lower_bound((art_node256_t *)node, key_chunk);
+ default:
+ assert(false);
+ return (art_indexed_child_t){0};
+ }
+}
+
+// ====================== End of node-specific functions =======================
+
+// Compares the given ranges of two keys, returns their relative order:
+// * Key range 1 < key range 2: a negative value
+// * Key range 1 == key range 2: 0
+// * Key range 1 > key range 2: a positive value
+static inline int art_compare_prefix(const art_key_chunk_t key1[],
+ uint8_t key1_from,
+ const art_key_chunk_t key2[],
+ uint8_t key2_from, uint8_t length) {
+ return memcmp(key1 + key1_from, key2 + key2_from, length);
+}
+
+// Compares two keys in full, see art_compare_prefix.
+int art_compare_keys(const art_key_chunk_t key1[],
+ const art_key_chunk_t key2[]) {
+ return art_compare_prefix(key1, 0, key2, 0, ART_KEY_BYTES);
+}
+
+// Returns the length of the common prefix between two key ranges.
+static uint8_t art_common_prefix(const art_key_chunk_t key1[],
+ uint8_t key1_from, uint8_t key1_to,
+ const art_key_chunk_t key2[],
+ uint8_t key2_from, uint8_t key2_to) {
+ uint8_t min_len = key1_to - key1_from;
+ uint8_t key2_len = key2_to - key2_from;
+ if (key2_len < min_len) {
+ min_len = key2_len;
+ }
+ uint8_t offset = 0;
+ for (; offset < min_len; ++offset) {
+ if (key1[key1_from + offset] != key2[key2_from + offset]) {
+ return offset;
+ }
+ }
+ return offset;
+}
+
+// Returns a pointer to the rootmost node where the value was inserted, may not
+// be equal to `node`.
+static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[],
+ uint8_t depth, art_leaf_t *new_leaf) {
+ if (art_is_leaf(node)) {
+ art_leaf_t *leaf = CAST_LEAF(node);
+ uint8_t common_prefix = art_common_prefix(
+ leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES);
+
+ // Previously this was a leaf, create an inner node instead and add both
+ // the existing and new leaf to it.
+ art_node_t *new_node =
+ (art_node_t *)art_node4_create(key + depth, common_prefix);
+
+ new_node = art_node_insert_leaf((art_inner_node_t *)new_node,
+ leaf->key[depth + common_prefix], leaf);
+ new_node = art_node_insert_leaf((art_inner_node_t *)new_node,
+ key[depth + common_prefix], new_leaf);
+
+ // The new inner node is now the rootmost node.
+ return new_node;
+ }
+ art_inner_node_t *inner_node = (art_inner_node_t *)node;
+ // Not a leaf: inner node
+ uint8_t common_prefix =
+ art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key,
+ depth, ART_KEY_BYTES);
+ if (common_prefix != inner_node->prefix_size) {
+ // Partial prefix match. Create a new internal node to hold the common
+ // prefix.
+ art_node4_t *node4 =
+ art_node4_create(inner_node->prefix, common_prefix);
+
+ // Make the existing internal node a child of the new internal node.
+ node4 = (art_node4_t *)art_node4_insert(
+ node4, node, inner_node->prefix[common_prefix]);
+
+ // Correct the prefix of the moved internal node, trimming off the chunk
+ // inserted into the new internal node.
+ inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1;
+ if (inner_node->prefix_size > 0) {
+ // Move the remaining prefix to the correct position.
+ memmove(inner_node->prefix, inner_node->prefix + common_prefix + 1,
+ inner_node->prefix_size);
+ }
+
+ // Insert the value in the new internal node.
+ return art_node_insert_leaf(&node4->base, key[common_prefix + depth],
+ new_leaf);
+ }
+ // Prefix matches entirely or node has no prefix. Look for an existing
+ // child.
+ art_key_chunk_t key_chunk = key[depth + common_prefix];
+ art_node_t *child = art_find_child(inner_node, key_chunk);
+ if (child != NULL) {
+ art_node_t *new_child =
+ art_insert_at(child, key, depth + common_prefix + 1, new_leaf);
+ if (new_child != child) {
+ // Node type changed.
+ art_replace(inner_node, key_chunk, new_child);
+ }
+ return node;
+ }
+ return art_node_insert_leaf(inner_node, key_chunk, new_leaf);
+}
+
+// Erase helper struct.
+typedef struct art_erase_result_s {
+ // The rootmost node where the value was erased, may not be equal to `node`.
+ // If no value was removed, this is null.
+ art_node_t *rootmost_node;
+
+ // Value removed, null if not removed.
+ art_val_t *value_erased;
+} art_erase_result_t;
+
+// Searches for the given key starting at `node`, erases it if found.
+static art_erase_result_t art_erase_at(art_node_t *node,
+ const art_key_chunk_t *key,
+ uint8_t depth) {
+ art_erase_result_t result;
+ result.rootmost_node = NULL;
+ result.value_erased = NULL;
+
+ if (art_is_leaf(node)) {
+ art_leaf_t *leaf = CAST_LEAF(node);
+ uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES,
+ key, 0, ART_KEY_BYTES);
+ if (common_prefix != ART_KEY_BYTES) {
+ // Leaf key mismatch.
+ return result;
+ }
+ result.value_erased = (art_val_t *)leaf;
+ return result;
+ }
+ art_inner_node_t *inner_node = (art_inner_node_t *)node;
+ uint8_t common_prefix =
+ art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key,
+ depth, ART_KEY_BYTES);
+ if (common_prefix != inner_node->prefix_size) {
+ // Prefix mismatch.
+ return result;
+ }
+ art_key_chunk_t key_chunk = key[depth + common_prefix];
+ art_node_t *child = art_find_child(inner_node, key_chunk);
+ if (child == NULL) {
+ // No child with key chunk.
+ return result;
+ }
+ // Try to erase the key further down. Skip the key chunk associated with the
+ // child in the node.
+ art_erase_result_t child_result =
+ art_erase_at(child, key, depth + common_prefix + 1);
+ if (child_result.value_erased == NULL) {
+ return result;
+ }
+ result.value_erased = child_result.value_erased;
+ result.rootmost_node = node;
+ if (child_result.rootmost_node == NULL) {
+ // Child node was fully erased, erase it from this node's children.
+ result.rootmost_node = art_node_erase(inner_node, key_chunk);
+ } else if (child_result.rootmost_node != child) {
+ // Child node was not fully erased, update the pointer to it in this
+ // node.
+ art_replace(inner_node, key_chunk, child_result.rootmost_node);
+ }
+ return result;
+}
+
+// Searches for the given key starting at `node`, returns NULL if the key was
+// not found.
+static art_val_t *art_find_at(const art_node_t *node,
+ const art_key_chunk_t *key, uint8_t depth) {
+ while (!art_is_leaf(node)) {
+ art_inner_node_t *inner_node = (art_inner_node_t *)node;
+ uint8_t common_prefix =
+ art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size,
+ key, depth, ART_KEY_BYTES);
+ if (common_prefix != inner_node->prefix_size) {
+ return NULL;
+ }
+ art_node_t *child =
+ art_find_child(inner_node, key[depth + inner_node->prefix_size]);
+ if (child == NULL) {
+ return NULL;
+ }
+ node = child;
+ // Include both the prefix and the child key chunk in the depth.
+ depth += inner_node->prefix_size + 1;
+ }
+ art_leaf_t *leaf = CAST_LEAF(node);
+ if (depth >= ART_KEY_BYTES) {
+ return (art_val_t *)leaf;
+ }
+ uint8_t common_prefix =
+ art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES);
+ if (common_prefix == ART_KEY_BYTES) {
+ return (art_val_t *)leaf;
+ }
+ return NULL;
+}
+
+// Returns the size in bytes of the subtrie.
+size_t art_size_in_bytes_at(const art_node_t *node) {
+ if (art_is_leaf(node)) {
+ return 0;
+ }
+ size_t size = 0;
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE: {
+ size += sizeof(art_node4_t);
+ } break;
+ case ART_NODE16_TYPE: {
+ size += sizeof(art_node16_t);
+ } break;
+ case ART_NODE48_TYPE: {
+ size += sizeof(art_node48_t);
+ } break;
+ case ART_NODE256_TYPE: {
+ size += sizeof(art_node256_t);
+ } break;
+ default:
+ assert(false);
+ break;
+ }
+ art_indexed_child_t indexed_child = art_node_next_child(node, -1);
+ while (indexed_child.child != NULL) {
+ size += art_size_in_bytes_at(indexed_child.child);
+ indexed_child = art_node_next_child(node, indexed_child.index);
+ }
+ return size;
+}
+
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+static void art_node_print_type(const art_node_t *node) {
+ if (art_is_leaf(node)) {
+ printf("Leaf");
+ return;
+ }
+ switch (art_get_type((art_inner_node_t *)node)) {
+ case ART_NODE4_TYPE:
+ printf("Node4");
+ return;
+ case ART_NODE16_TYPE:
+ printf("Node16");
+ return;
+ case ART_NODE48_TYPE:
+ printf("Node48");
+ return;
+ case ART_NODE256_TYPE:
+ printf("Node256");
+ return;
+ default:
+ assert(false);
+ return;
+ }
+}
+
+
+void art_node_printf(const art_node_t *node, uint8_t depth) {
+ if (art_is_leaf(node)) {
+ printf("{ type: Leaf, key: ");
+ art_leaf_t *leaf = CAST_LEAF(node);
+ size_t i; for(i = 0; i < ART_KEY_BYTES; ++i) {
+ printf("%02x", leaf->key[i]);
+ }
+ printf(" }\n");
+ return;
+ }
+ printf("{\n");
+ depth++;
+
+ printf("%*s", depth, "");
+ printf("type: ");
+ art_node_print_type(node);
+ printf("\n");
+
+ art_inner_node_t *inner_node = (art_inner_node_t *)node;
+ printf("%*s", depth, "");
+ printf("prefix_size: %d\n", inner_node->prefix_size);
+
+ printf("%*s", depth, "");
+ printf("prefix: ");
+ uint8_t i; for(i = 0; i < inner_node->prefix_size; ++i) {
+ printf("%02x", inner_node->prefix[i]);
+ }
+ printf("\n");
+
+ switch (art_get_type(inner_node)) {
+ case ART_NODE4_TYPE: {
+ art_node4_t *node4 = (art_node4_t *)node;
+ uint8_t i; for(i = 0; i < node4->count; ++i) {
+ printf("%*s", depth, "");
+ printf("key: %02x ", node4->keys[i]);
+ art_node_printf(node4->children[i], depth);
+ }
+ } break;
+ case ART_NODE16_TYPE: {
+ art_node16_t *node16 = (art_node16_t *)node;
+ uint8_t i; for(i = 0; i < node16->count; ++i) {
+ printf("%*s", depth, "");
+ printf("key: %02x ", node16->keys[i]);
+ art_node_printf(node16->children[i], depth);
+ }
+ } break;
+ case ART_NODE48_TYPE: {
+ art_node48_t *node48 = (art_node48_t *)node;
+ int i; for(i = 0; i < 256; ++i) {
+ if (node48->keys[i] != ART_NODE48_EMPTY_VAL) {
+ printf("%*s", depth, "");
+ printf("key: %02x ", i);
+ printf("child: %02x ", node48->keys[i]);
+ art_node_printf(node48->children[node48->keys[i]], depth);
+ }
+ }
+ } break;
+ case ART_NODE256_TYPE: {
+ art_node256_t *node256 = (art_node256_t *)node;
+ int i; for(i = 0; i < 256; ++i) {
+ if (node256->children[i] != NULL) {
+ printf("%*s", depth, "");
+ printf("key: %02x ", i);
+ art_node_printf(node256->children[i], depth);
+ }
+ }
+ } break;
+ default:
+ assert(false);
+ break;
+ }
+ depth--;
+ printf("%*s", depth, "");
+ printf("}\n");
+}
+#endif
+
+void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) {
+ art_leaf_t *leaf = (art_leaf_t *)val;
+ art_leaf_populate(leaf, key);
+ if (art->root == NULL) {
+ art->root = (art_node_t *)SET_LEAF(leaf);
+ return;
+ }
+ art->root = art_insert_at(art->root, key, 0, leaf);
+}
+
+art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) {
+ if (art->root == NULL) {
+ return NULL;
+ }
+ art_erase_result_t result = art_erase_at(art->root, key, 0);
+ if (result.value_erased == NULL) {
+ return NULL;
+ }
+ art->root = result.rootmost_node;
+ return result.value_erased;
+}
+
+art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) {
+ if (art->root == NULL) {
+ return NULL;
+ }
+ return art_find_at(art->root, key, 0);
+}
+
+bool art_is_empty(const art_t *art) { return art->root == NULL; }
+
+void art_free(art_t *art) {
+ if (art->root == NULL) {
+ return;
+ }
+ art_free_node(art->root);
+}
+
+size_t art_size_in_bytes(const art_t *art) {
+ size_t size = sizeof(art_t);
+ if (art->root != NULL) {
+ size += art_size_in_bytes_at(art->root);
+ }
+ return size;
+}
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+void art_printf(const art_t *art) {
+ if (art->root == NULL) {
+ return;
+ }
+ art_node_printf(art->root, 0);
+}
+#endif
+
+
+// Returns the current node that the iterator is positioned at.
+static inline art_node_t *art_iterator_node(art_iterator_t *iterator) {
+ return iterator->frames[iterator->frame].node;
+}
+
+// Sets the iterator key and value to the leaf's key and value. Always returns
+// true for convenience.
+static inline bool art_iterator_valid_loc(art_iterator_t *iterator,
+ art_leaf_t *leaf) {
+ iterator->frames[iterator->frame].node = SET_LEAF(leaf);
+ iterator->frames[iterator->frame].index_in_node = 0;
+ memcpy(iterator->key, leaf->key, ART_KEY_BYTES);
+ iterator->value = (art_val_t *)leaf;
+ return true;
+}
+
+// Invalidates the iterator key and value. Always returns false for convenience.
+static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) {
+ memset(iterator->key, 0, ART_KEY_BYTES);
+ iterator->value = NULL;
+ return false;
+}
+
+// Moves the iterator one level down in the tree, given a node at the current
+// level and the index of the child that we're going down to.
+//
+// Note: does not set the index at the new level.
+static void art_iterator_down(art_iterator_t *iterator,
+ const art_inner_node_t *node,
+ uint8_t index_in_node) {
+ iterator->frames[iterator->frame].node = (art_node_t *)node;
+ iterator->frames[iterator->frame].index_in_node = index_in_node;
+ iterator->frame++;
+ art_indexed_child_t indexed_child =
+ art_node_child_at((art_node_t *)node, index_in_node);
+ assert(indexed_child.child != NULL);
+ iterator->frames[iterator->frame].node = indexed_child.child;
+ iterator->depth += node->prefix_size + 1;
+}
+
+// Moves the iterator to the next/previous child of the current node. Returns
+// the child moved to, or NULL if there is no neighboring child.
+static art_node_t *art_iterator_neighbor_child(
+ art_iterator_t *iterator, const art_inner_node_t *inner_node,
+ bool forward) {
+ art_iterator_frame_t frame = iterator->frames[iterator->frame];
+ art_indexed_child_t indexed_child;
+ if (forward) {
+ indexed_child = art_node_next_child(frame.node, frame.index_in_node);
+ } else {
+ indexed_child = art_node_prev_child(frame.node, frame.index_in_node);
+ }
+ if (indexed_child.child != NULL) {
+ art_iterator_down(iterator, inner_node, indexed_child.index);
+ }
+ return indexed_child.child;
+}
+
+// Moves the iterator one level up in the tree, returns false if not possible.
+static bool art_iterator_up(art_iterator_t *iterator) {
+ if (iterator->frame == 0) {
+ return false;
+ }
+ iterator->frame--;
+ // We went up, so we are at an inner node.
+ iterator->depth -=
+ ((art_inner_node_t *)art_iterator_node(iterator))->prefix_size + 1;
+ return true;
+}
+
+// Moves the iterator one level, followed by a move to the next / previous leaf.
+// Sets the status of the iterator.
+static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) {
+ if (!art_iterator_up(iterator)) {
+ // We're at the root.
+ return art_iterator_invalid_loc(iterator);
+ }
+ return art_iterator_move(iterator, forward);
+}
+
+// Initializes the iterator at the first / last leaf of the given node.
+// Returns true for convenience.
+static bool art_node_init_iterator(const art_node_t *node,
+ art_iterator_t *iterator, bool first) {
+ while (!art_is_leaf(node)) {
+ art_indexed_child_t indexed_child;
+ if (first) {
+ indexed_child = art_node_next_child(node, -1);
+ } else {
+ indexed_child = art_node_prev_child(node, 256);
+ }
+ art_iterator_down(iterator, (art_inner_node_t *)node,
+ indexed_child.index);
+ node = indexed_child.child;
+ }
+ // We're at a leaf.
+ iterator->frames[iterator->frame].node = (art_node_t *)node;
+ iterator->frames[iterator->frame].index_in_node = 0; // Should not matter.
+ return art_iterator_valid_loc(iterator, CAST_LEAF(node));
+}
+
+bool art_iterator_move(art_iterator_t *iterator, bool forward) {
+ if (art_is_leaf(art_iterator_node(iterator))) {
+ bool went_up = art_iterator_up(iterator);
+ if (!went_up) {
+ // This leaf is the root, we're done.
+ return art_iterator_invalid_loc(iterator);
+ }
+ }
+ // Advance within inner node.
+ art_node_t *neighbor_child = art_iterator_neighbor_child(
+ iterator, (art_inner_node_t *)art_iterator_node(iterator), forward);
+ if (neighbor_child != NULL) {
+ // There is another child at this level, go down to the first or last
+ // leaf.
+ return art_node_init_iterator(neighbor_child, iterator, forward);
+ }
+ // No more children at this level, go up.
+ return art_iterator_up_and_move(iterator, forward);
+}
+
+// Assumes the iterator is positioned at a node with an equal prefix path up to
+// the depth of the iterator.
+static bool art_node_iterator_lower_bound(const art_node_t *node,
+ art_iterator_t *iterator,
+ const art_key_chunk_t key[]) {
+ while (!art_is_leaf(node)) {
+ art_inner_node_t *inner_node = (art_inner_node_t *)node;
+ int prefix_comparison =
+ art_compare_prefix(inner_node->prefix, 0, key, iterator->depth,
+ inner_node->prefix_size);
+ if (prefix_comparison < 0) {
+ // Prefix so far has been equal, but we've found a smaller key.
+ // Since we take the lower bound within each node, we can return the
+ // next leaf.
+ return art_iterator_up_and_move(iterator, true);
+ } else if (prefix_comparison > 0) {
+ // No key equal to the key we're looking for, return the first leaf.
+ return art_node_init_iterator(node, iterator, true);
+ }
+ // Prefix is equal, move to lower bound child.
+ art_key_chunk_t key_chunk =
+ key[iterator->depth + inner_node->prefix_size];
+ art_indexed_child_t indexed_child =
+ art_node_lower_bound(node, key_chunk);
+ if (indexed_child.child == NULL) {
+ // Only smaller keys among children.
+ return art_iterator_up_and_move(iterator, true);
+ }
+ if (indexed_child.key_chunk > key_chunk) {
+ // Only larger children, return the first larger child.
+ art_iterator_down(iterator, inner_node, indexed_child.index);
+ return art_node_init_iterator(indexed_child.child, iterator, true);
+ }
+ // We found a child with an equal prefix.
+ art_iterator_down(iterator, inner_node, indexed_child.index);
+ node = indexed_child.child;
+ }
+ art_leaf_t *leaf = CAST_LEAF(node);
+ if (art_compare_keys(leaf->key, key) >= 0) {
+ // Leaf has an equal or larger key.
+ return art_iterator_valid_loc(iterator, leaf);
+ }
+ // Leaf has an equal prefix, but the full key is smaller. Move to the next
+ // leaf.
+ return art_iterator_up_and_move(iterator, true);
+}
+
+art_iterator_t art_init_iterator(const art_t *art, bool first) {
+ art_iterator_t iterator;
+ memset(&iterator, 0, sizeof(iterator));
+ if (art->root == NULL) {
+ return iterator;
+ }
+ art_node_init_iterator(art->root, &iterator, first);
+ return iterator;
+}
+
+bool art_iterator_next(art_iterator_t *iterator) {
+ return art_iterator_move(iterator, true);
+}
+
+bool art_iterator_prev(art_iterator_t *iterator) {
+ return art_iterator_move(iterator, false);
+}
+
+bool art_iterator_lower_bound(art_iterator_t *iterator,
+ const art_key_chunk_t *key) {
+ if (iterator->value == NULL) {
+ // We're beyond the end / start of the ART so the iterator does not have
+ // a valid key. Start from the root.
+ iterator->frame = 0;
+ iterator->depth = 0;
+ return art_node_iterator_lower_bound(art_iterator_node(iterator),
+ iterator, key);
+ }
+ int compare_result =
+ art_compare_prefix(iterator->key, 0, key, 0, ART_KEY_BYTES);
+ // Move up until we have an equal prefix, after which we can do a normal
+ // lower bound search.
+ while (compare_result != 0) {
+ if (!art_iterator_up(iterator)) {
+ if (compare_result < 0) {
+ // Only smaller keys found.
+ return art_iterator_invalid_loc(iterator);
+ } else {
+ return art_node_init_iterator(art_iterator_node(iterator),
+ iterator, true);
+ }
+ }
+ // Since we're only moving up, we can keep comparing against the
+ // iterator key.
+ art_inner_node_t *inner_node =
+ (art_inner_node_t *)art_iterator_node(iterator);
+ compare_result =
+ art_compare_prefix(iterator->key, 0, key, 0,
+ iterator->depth + inner_node->prefix_size);
+ }
+ if (compare_result > 0) {
+ return art_node_init_iterator(art_iterator_node(iterator), iterator,
+ true);
+ }
+ return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator,
+ key);
+}
+
+art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) {
+ art_iterator_t iterator;
+ memset(&iterator, 0, sizeof(iterator));
+ if (art->root != NULL) {
+ art_node_iterator_lower_bound(art->root, &iterator, key);
+ }
+ return iterator;
+}
+
+art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) {
+ art_iterator_t iterator;
+ memset(&iterator, 0, sizeof(iterator));
+ if (art->root != NULL) {
+ if (art_node_iterator_lower_bound(art->root, &iterator, key) &&
+ art_compare_keys(iterator.key, key) == 0) {
+ art_iterator_next(&iterator);
+ }
+ }
+ return iterator;
+}
+
+void art_iterator_insert(art_t *art, art_iterator_t *iterator,
+ const art_key_chunk_t *key, art_val_t *val) {
+ // TODO: This can likely be faster.
+ art_insert(art, key, val);
+ assert(art->root != NULL);
+ iterator->frame = 0;
+ iterator->depth = 0;
+ art_node_iterator_lower_bound(art->root, iterator, key);
+}
+
+// TODO: consider keeping `art_t *art` in the iterator.
+art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) {
+ if (iterator->value == NULL) {
+ return NULL;
+ }
+ art_key_chunk_t initial_key[ART_KEY_BYTES];
+ memcpy(initial_key, iterator->key, ART_KEY_BYTES);
+
+ art_val_t *value_erased = iterator->value;
+ bool went_up = art_iterator_up(iterator);
+ if (!went_up) {
+ // We're erasing the root.
+ art->root = NULL;
+ art_iterator_invalid_loc(iterator);
+ return value_erased;
+ }
+
+ // Erase the leaf.
+ art_inner_node_t *parent_node =
+ (art_inner_node_t *)art_iterator_node(iterator);
+ art_key_chunk_t key_chunk_in_parent =
+ iterator->key[iterator->depth + parent_node->prefix_size];
+ art_node_t *new_parent_node =
+ art_node_erase(parent_node, key_chunk_in_parent);
+
+ if (new_parent_node != ((art_node_t *)parent_node)) {
+ // Replace the pointer to the inner node we erased from in its
+ // parent (it may be a leaf now).
+ iterator->frames[iterator->frame].node = new_parent_node;
+ went_up = art_iterator_up(iterator);
+ if (went_up) {
+ art_inner_node_t *grandparent_node =
+ (art_inner_node_t *)art_iterator_node(iterator);
+ art_key_chunk_t key_chunk_in_grandparent =
+ iterator->key[iterator->depth + grandparent_node->prefix_size];
+ art_replace(grandparent_node, key_chunk_in_grandparent,
+ new_parent_node);
+ } else {
+ // We were already at the rootmost node.
+ art->root = new_parent_node;
+ }
+ }
+
+ iterator->frame = 0;
+ iterator->depth = 0;
+ // Do a lower bound search for the initial key, which will find the first
+ // greater key if it exists. This can likely be mildly faster if we instead
+ // start from the current position.
+ art_node_iterator_lower_bound(art->root, iterator, initial_key);
+ return value_erased;
+}
+
+static bool art_internal_validate_at(const art_node_t *node,
+ art_internal_validate_t validator) {
+ if (node == NULL) {
+ return art_validate_fail(&validator, "node is null");
+ }
+ if (art_is_leaf(node)) {
+ art_leaf_t *leaf = CAST_LEAF(node);
+ if (art_compare_prefix(leaf->key, 0, validator.current_key, 0,
+ validator.depth) != 0) {
+ return art_validate_fail(
+ &validator,
+ "leaf key does not match its position's prefix in the tree");
+ }
+ if (validator.validate_cb != NULL &&
+ !validator.validate_cb(leaf, validator.reason)) {
+ if (*validator.reason == NULL) {
+ *validator.reason = "leaf validation failed";
+ }
+ return false;
+ }
+ } else {
+ art_inner_node_t *inner_node = (art_inner_node_t *)node;
+
+ if (validator.depth + inner_node->prefix_size + 1 > ART_KEY_BYTES) {
+ return art_validate_fail(&validator,
+ "node has too much prefix at given depth");
+ }
+ memcpy(validator.current_key + validator.depth, inner_node->prefix,
+ inner_node->prefix_size);
+ validator.depth += inner_node->prefix_size;
+
+ switch (inner_node->typecode) {
+ case ART_NODE4_TYPE:
+ if (!art_node4_internal_validate((art_node4_t *)inner_node,
+ validator)) {
+ return false;
+ }
+ break;
+ case ART_NODE16_TYPE:
+ if (!art_node16_internal_validate((art_node16_t *)inner_node,
+ validator)) {
+ return false;
+ }
+ break;
+ case ART_NODE48_TYPE:
+ if (!art_node48_internal_validate((art_node48_t *)inner_node,
+ validator)) {
+ return false;
+ }
+ break;
+ case ART_NODE256_TYPE:
+ if (!art_node256_internal_validate((art_node256_t *)inner_node,
+ validator)) {
+ return false;
+ }
+ break;
+ default:
+ return art_validate_fail(&validator, "invalid node type");
+ }
+ }
+ return true;
+}
+
+bool art_internal_validate(const art_t *art, const char **reason,
+ art_validate_cb_t validate_cb) {
+ const char *reason_local;
+ if (reason == NULL) {
+ // Always allow assigning through *reason
+ reason = &reason_local;
+ }
+ *reason = NULL;
+ if (art->root == NULL) {
+ return true;
+ }
+ art_internal_validate_t validator = {
+ .reason = reason,
+ .validate_cb = validate_cb,
+ .depth = 0,
+ .current_key = {0},
+ };
+ return art_internal_validate_at(art->root, validator);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace roaring
+} // namespace internal
+#endif
+/* end file src/art/art.c */
+/* begin file src/bitset.c */
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace internal {
+#endif
+
+extern inline void bitset_print(const bitset_t *b);
+extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
+ void *ptr);
+extern inline size_t bitset_next_set_bits(const bitset_t *bitset,
+ size_t *buffer, size_t capacity,
+ size_t *startfrom);
+extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag);
+extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i);
+extern inline void bitset_set(bitset_t *bitset, size_t i);
+extern inline bool bitset_get(const bitset_t *bitset, size_t i);
+extern inline size_t bitset_size_in_words(const bitset_t *bitset);
+extern inline size_t bitset_size_in_bits(const bitset_t *bitset);
+extern inline size_t bitset_size_in_bytes(const bitset_t *bitset);
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_t *bitset_create(void) {
+ bitset_t *bitset = NULL;
+ /* Allocate the bitset itself. */
+ if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
+ return NULL;
+ }
+ bitset->array = NULL;
+ bitset->arraysize = 0;
+ bitset->capacity = 0;
+ return bitset;
+}
+
+/* Create a new bitset able to contain size bits. Return NULL in case of
+ * failure. */
+bitset_t *bitset_create_with_capacity(size_t size) {
+ bitset_t *bitset = NULL;
+ /* Allocate the bitset itself. */
+ if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
+ return NULL;
+ }
+ bitset->arraysize =
+ (size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8);
+ bitset->capacity = bitset->arraysize;
+ if ((bitset->array = (uint64_t *)roaring_calloc(
+ bitset->arraysize, sizeof(uint64_t))) == NULL) {
+ roaring_free(bitset);
+ return NULL;
+ }
+ return bitset;
+}
+
+/* Create a copy */
+bitset_t *bitset_copy(const bitset_t *bitset) {
+ bitset_t *copy = NULL;
+ /* Allocate the bitset itself. */
+ if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
+ return NULL;
+ }
+ memcpy(copy, bitset, sizeof(bitset_t));
+ copy->capacity = copy->arraysize;
+ if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) *
+ bitset->arraysize)) == NULL) {
+ roaring_free(copy);
+ return NULL;
+ }
+ memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize);
+ return copy;
+}
+
+void bitset_clear(bitset_t *bitset) {
+ memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize);
+}
+
+void bitset_fill(bitset_t *bitset) {
+ memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize);
+}
+
+void bitset_shift_left(bitset_t *bitset, size_t s) {
+ size_t extra_words = s / 64;
+ int inword_shift = s % 64;
+ size_t as = bitset->arraysize;
+ if (inword_shift == 0) {
+ bitset_resize(bitset, as + extra_words, false);
+ // could be done with a memmove
+ size_t i; for(i = as + extra_words; i > extra_words; i--) {
+ bitset->array[i - 1] = bitset->array[i - 1 - extra_words];
+ }
+ } else {
+ bitset_resize(bitset, as + extra_words + 1, true);
+ bitset->array[as + extra_words] =
+ bitset->array[as - 1] >> (64 - inword_shift);
+ size_t i; for(i = as + extra_words; i >= extra_words + 2; i--) {
+ bitset->array[i - 1] =
+ (bitset->array[i - 1 - extra_words] << inword_shift) |
+ (bitset->array[i - 2 - extra_words] >> (64 - inword_shift));
+ }
+ bitset->array[extra_words] = bitset->array[0] << inword_shift;
+ }
+ size_t i; for(i = 0; i < extra_words; i++) {
+ bitset->array[i] = 0;
+ }
+}
+
+void bitset_shift_right(bitset_t *bitset, size_t s) {
+ size_t extra_words = s / 64;
+ int inword_shift = s % 64;
+ size_t as = bitset->arraysize;
+ if (inword_shift == 0) {
+ // could be done with a memmove
+ size_t i; for(i = 0; i < as - extra_words; i++) {
+ bitset->array[i] = bitset->array[i + extra_words];
+ }
+ bitset_resize(bitset, as - extra_words, false);
+
+ } else {
+ size_t i; for(i = 0; i + extra_words + 1 < as; i++) {
+ bitset->array[i] =
+ (bitset->array[i + extra_words] >> inword_shift) |
+ (bitset->array[i + extra_words + 1] << (64 - inword_shift));
+ }
+ bitset->array[as - extra_words - 1] =
+ (bitset->array[as - 1] >> inword_shift);
+ bitset_resize(bitset, as - extra_words, false);
+ }
+}
+
+/* Free memory. */
+void bitset_free(bitset_t *bitset) {
+ if (bitset == NULL) {
+ return;
+ }
+ roaring_free(bitset->array);
+ roaring_free(bitset);
+}
+
+/* Resize the bitset so that it can support newarraysize * 64 bits. Return true
+ * in case of success, false for failure. */
+bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) {
+ if (newarraysize > SIZE_MAX / 64) {
+ return false;
+ }
+ size_t smallest =
+ newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize;
+ if (bitset->capacity < newarraysize) {
+ uint64_t *newarray;
+ size_t newcapacity = bitset->capacity;
+ if (newcapacity == 0) {
+ newcapacity = 1;
+ }
+ while (newcapacity < newarraysize) {
+ newcapacity *= 2;
+ }
+ if ((newarray = (uint64_t *)roaring_realloc(
+ bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) {
+ return false;
+ }
+ bitset->capacity = newcapacity;
+ bitset->array = newarray;
+ }
+ if (padwithzeroes && (newarraysize > smallest))
+ memset(bitset->array + smallest, 0,
+ sizeof(uint64_t) * (newarraysize - smallest));
+ bitset->arraysize = newarraysize;
+ return true; // success!
+}
+
+size_t bitset_count(const bitset_t *bitset) {
+ size_t card = 0;
+ size_t k = 0;
+ for (; k + 7 < bitset->arraysize; k += 8) {
+ card += roaring_hamming(bitset->array[k]);
+ card += roaring_hamming(bitset->array[k + 1]);
+ card += roaring_hamming(bitset->array[k + 2]);
+ card += roaring_hamming(bitset->array[k + 3]);
+ card += roaring_hamming(bitset->array[k + 4]);
+ card += roaring_hamming(bitset->array[k + 5]);
+ card += roaring_hamming(bitset->array[k + 6]);
+ card += roaring_hamming(bitset->array[k + 7]);
+ }
+ for (; k + 3 < bitset->arraysize; k += 4) {
+ card += roaring_hamming(bitset->array[k]);
+ card += roaring_hamming(bitset->array[k + 1]);
+ card += roaring_hamming(bitset->array[k + 2]);
+ card += roaring_hamming(bitset->array[k + 3]);
+ }
+ for (; k < bitset->arraysize; k++) {
+ card += roaring_hamming(bitset->array[k]);
+ }
+ return card;
+}
+
+bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k; for(k = 0; k < minlength; ++k) {
+ b1->array[k] |= b2->array[k];
+ }
+ if (b2->arraysize > b1->arraysize) {
+ size_t oldsize = b1->arraysize;
+ if (!bitset_resize(b1, b2->arraysize, false)) return false;
+ memcpy(b1->array + oldsize, b2->array + oldsize,
+ (b2->arraysize - oldsize) * sizeof(uint64_t));
+ }
+ return true;
+}
+
+size_t bitset_minimum(const bitset_t *bitset) {
+ size_t k; for(k = 0; k < bitset->arraysize; k++) {
+ uint64_t w = bitset->array[k];
+ if (w != 0) {
+ return roaring_trailing_zeroes(w) + k * 64;
+ }
+ }
+ return 0;
+}
+
+bool bitset_grow(bitset_t *bitset, size_t newarraysize) {
+ if (newarraysize < bitset->arraysize) {
+ return false;
+ }
+ if (newarraysize > SIZE_MAX / 64) {
+ return false;
+ }
+ if (bitset->capacity < newarraysize) {
+ uint64_t *newarray;
+ size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >>
+ roaring_leading_zeroes(newarraysize)) +
+ 1;
+ while (newcapacity < newarraysize) {
+ newcapacity *= 2;
+ }
+ if ((newarray = (uint64_t *)roaring_realloc(
+ bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) {
+ return false;
+ }
+ bitset->capacity = newcapacity;
+ bitset->array = newarray;
+ }
+ memset(bitset->array + bitset->arraysize, 0,
+ sizeof(uint64_t) * (newarraysize - bitset->arraysize));
+ bitset->arraysize = newarraysize;
+ return true; // success!
+}
+
+size_t bitset_maximum(const bitset_t *bitset) {
+ size_t k; for(k = bitset->arraysize; k > 0; k--) {
+ uint64_t w = bitset->array[k - 1];
+ if (w != 0) {
+ return 63 - roaring_leading_zeroes(w) + (k - 1) * 64;
+ }
+ }
+ return 0;
+}
+
+/* Returns true if bitsets share no common elements, false otherwise.
+ *
+ * Performs early-out if common element found. */
+bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+
+ size_t k; for(k = 0; k < minlength; k++) {
+ if ((b1->array[k] & b2->array[k]) != 0) return false;
+ }
+ return true;
+}
+
+/* Returns true if bitsets contain at least 1 common element, false if they are
+ * disjoint.
+ *
+ * Performs early-out if common element found. */
+bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+
+ size_t k; for(k = 0; k < minlength; k++) {
+ if ((b1->array[k] & b2->array[k]) != 0) return true;
+ }
+ return false;
+}
+
+/* Returns true if b has any bits set in or after b->array[starting_loc]. */
+static bool any_bits_set(const bitset_t *b, size_t starting_loc) {
+ if (starting_loc >= b->arraysize) {
+ return false;
+ }
+ size_t k; for(k = starting_loc; k < b->arraysize; k++) {
+ if (b->array[k] != 0) return true;
+ }
+ return false;
+}
+
+/* Returns true if b1 has all of b2's bits set.
+ *
+ * Performs early out if a bit is found in b2 that is not found in b1. */
+bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t min_size = b1->arraysize;
+ if (b1->arraysize > b2->arraysize) {
+ min_size = b2->arraysize;
+ }
+ size_t k; for(k = 0; k < min_size; k++) {
+ if ((b1->array[k] & b2->array[k]) != b2->array[k]) {
+ return false;
+ }
+ }
+ if (b2->arraysize > b1->arraysize) {
+ /* Need to check if b2 has any bits set beyond b1's array */
+ return !any_bits_set(b2, b1->arraysize);
+ }
+ return true;
+}
+
+size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t answer = 0;
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k = 0;
+ for (; k + 3 < minlength; k += 4) {
+ answer += roaring_hamming(b1->array[k] | b2->array[k]);
+ answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]);
+ answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]);
+ answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]);
+ }
+ for (; k < minlength; ++k) {
+ answer += roaring_hamming(b1->array[k] | b2->array[k]);
+ }
+ if (b2->arraysize > b1->arraysize) {
+ // k is equal to b1->arraysize
+ for (; k + 3 < b2->arraysize; k += 4) {
+ answer += roaring_hamming(b2->array[k]);
+ answer += roaring_hamming(b2->array[k + 1]);
+ answer += roaring_hamming(b2->array[k + 2]);
+ answer += roaring_hamming(b2->array[k + 3]);
+ }
+ for (; k < b2->arraysize; ++k) {
+ answer += roaring_hamming(b2->array[k]);
+ }
+ } else {
+ // k is equal to b2->arraysize
+ for (; k + 3 < b1->arraysize; k += 4) {
+ answer += roaring_hamming(b1->array[k]);
+ answer += roaring_hamming(b1->array[k + 1]);
+ answer += roaring_hamming(b1->array[k + 2]);
+ answer += roaring_hamming(b1->array[k + 3]);
+ }
+ for (; k < b1->arraysize; ++k) {
+ answer += roaring_hamming(b1->array[k]);
+ }
+ }
+ return answer;
+}
+
+void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k = 0;
+ for (; k < minlength; ++k) {
+ b1->array[k] &= b2->array[k];
+ }
+ for (; k < b1->arraysize; ++k) {
+ b1->array[k] = 0; // memset could, maybe, be a tiny bit faster
+ }
+}
+
+size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t answer = 0;
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k; for(k = 0; k < minlength; ++k) {
+ answer += roaring_hamming(b1->array[k] & b2->array[k]);
+ }
+ return answer;
+}
+
+void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k = 0;
+ for (; k < minlength; ++k) {
+ b1->array[k] &= ~(b2->array[k]);
+ }
+}
+
+size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k = 0;
+ size_t answer = 0;
+ for (; k < minlength; ++k) {
+ answer += roaring_hamming(b1->array[k] & ~(b2->array[k]));
+ }
+ for (; k < b1->arraysize; ++k) {
+ answer += roaring_hamming(b1->array[k]);
+ }
+ return answer;
+}
+
+bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k = 0;
+ for (; k < minlength; ++k) {
+ b1->array[k] ^= b2->array[k];
+ }
+ if (b2->arraysize > b1->arraysize) {
+ size_t oldsize = b1->arraysize;
+ if (!bitset_resize(b1, b2->arraysize, false)) return false;
+ memcpy(b1->array + oldsize, b2->array + oldsize,
+ (b2->arraysize - oldsize) * sizeof(uint64_t));
+ }
+ return true;
+}
+
+size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+ const bitset_t *CBITSET_RESTRICT b2) {
+ size_t minlength =
+ b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+ size_t k = 0;
+ size_t answer = 0;
+ for (; k < minlength; ++k) {
+ answer += roaring_hamming(b1->array[k] ^ b2->array[k]);
+ }
+ if (b2->arraysize > b1->arraysize) {
+ for (; k < b2->arraysize; ++k) {
+ answer += roaring_hamming(b2->array[k]);
+ }
+ } else {
+ for (; k < b1->arraysize; ++k) {
+ answer += roaring_hamming(b1->array[k]);
+ }
+ }
+ return answer;
+}
+
+bool bitset_trim(bitset_t *bitset) {
+ size_t newsize = bitset->arraysize;
+ while (newsize > 0) {
+ if (bitset->array[newsize - 1] == 0)
+ newsize -= 1;
+ else
+ break;
+ }
+ if (bitset->capacity == newsize) return true; // nothing to do
+ uint64_t *newarray;
+ if ((newarray = (uint64_t *)roaring_realloc(
+ bitset->array, sizeof(uint64_t) * newsize)) == NULL) {
+ return false;
+ }
+ bitset->array = newarray;
+ bitset->capacity = newsize;
+ bitset->arraysize = newsize;
+ return true;
+}
+
+#ifdef __cplusplus
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
-/* end file src/array_util.c */
+/* end file src/bitset.c */
/* begin file src/bitset_util.c */
#include <assert.h>
#include <stdint.h>
@@ -9035,11 +11529,24 @@ bool memequals(const void *s1, const void *s2, size_t n) {
#include <string.h>
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+using namespace ::roaring::internal;
+extern "C" {
+namespace roaring {
+namespace api {
#endif
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
static uint8_t lengthTable[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -9054,7 +11561,7 @@ static uint8_t lengthTable[256] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
#endif
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
ALIGNED(32)
static uint32_t vecDecodeTable[256][8] = {
{0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
@@ -9315,9 +11822,9 @@ static uint32_t vecDecodeTable[256][8] = {
{1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
};
-#endif // #ifdef CROARING_IS_X64
+#endif // #if CROARING_IS_X64
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
// same as vecDecodeTable but in 16 bits
ALIGNED(32)
static uint16_t vecDecodeTable_uint16[256][8] = {
@@ -9581,7 +12088,126 @@ static uint16_t vecDecodeTable_uint16[256][8] = {
#endif
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+const uint8_t vbmi2_table[64] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length,
+ uint32_t *vout, size_t outcapacity,
+ uint32_t base) {
+ uint32_t *out = (uint32_t *)vout;
+ uint32_t *initout = out;
+ uint32_t *safeout = out + outcapacity;
+ __m512i base_v = _mm512_set1_epi32(base);
+ __m512i index_table = _mm512_loadu_si512(vbmi2_table);
+ size_t i = 0;
+
+ for (; (i < length) && ((out + 64) < safeout); i += 1) {
+ uint64_t v = words[i];
+ __m512i vec = _mm512_maskz_compress_epi8(v, index_table);
+
+ uint8_t advance = (uint8_t)roaring_hamming(v);
+
+ __m512i vbase =
+ _mm512_add_epi32(base_v, _mm512_set1_epi32((int)(i * 64)));
+ __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 0));
+ __m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 1));
+ __m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 2));
+ __m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 3));
+
+ r1 = _mm512_add_epi32(r1, vbase);
+ r2 = _mm512_add_epi32(r2, vbase);
+ r3 = _mm512_add_epi32(r3, vbase);
+ r4 = _mm512_add_epi32(r4, vbase);
+ _mm512_storeu_si512((__m512i *)out, r1);
+ _mm512_storeu_si512((__m512i *)(out + 16), r2);
+ _mm512_storeu_si512((__m512i *)(out + 32), r3);
+ _mm512_storeu_si512((__m512i *)(out + 48), r4);
+
+ out += advance;
+ }
+
+ base += i * 64;
+
+ for (; (i < length) && (out < safeout); ++i) {
+ uint64_t w = words[i];
+ while ((w != 0) && (out < safeout)) {
+ uint64_t t =
+ w & (~w + 1); // on x64, should compile to BLSI (careful: the
+ // Intel compiler seems to fail)
+ int r =
+ roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
+ uint32_t val = r + base;
+ memcpy(out, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ out++;
+ w ^= t;
+ }
+ base += 64;
+ }
+
+ return out - initout;
+}
+
+// Reference:
+// https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/
+size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array,
+ size_t length, uint16_t *vout,
+ size_t capacity, uint16_t base) {
+ uint16_t *out = (uint16_t *)vout;
+ uint16_t *initout = out;
+ uint16_t *safeout = vout + capacity;
+
+ __m512i base_v = _mm512_set1_epi16(base);
+ __m512i index_table = _mm512_loadu_si512(vbmi2_table);
+ size_t i = 0;
+
+ for (; (i < length) && ((out + 64) < safeout); i++) {
+ uint64_t v = array[i];
+ __m512i vec = _mm512_maskz_compress_epi8(v, index_table);
+
+ uint8_t advance = (uint8_t)roaring_hamming(v);
+
+ __m512i vbase =
+ _mm512_add_epi16(base_v, _mm512_set1_epi16((short)(i * 64)));
+ __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 0));
+ __m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 1));
+
+ r1 = _mm512_add_epi16(r1, vbase);
+ r2 = _mm512_add_epi16(r2, vbase);
+
+ _mm512_storeu_si512((__m512i *)out, r1);
+ _mm512_storeu_si512((__m512i *)(out + 32), r2);
+ out += advance;
+ }
+
+ base += i * 64;
+
+ for (; (i < length) && (out < safeout); ++i) {
+ uint64_t w = array[i];
+ while ((w != 0) && (out < safeout)) {
+ uint64_t t =
+ w & (~w + 1); // on x64, should compile to BLSI (careful: the
+ // Intel compiler seems to fail)
+ int r =
+ roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
+ uint32_t val = r + base;
+ memcpy(out, &val, sizeof(uint16_t));
+ out++;
+ w ^= t;
+ }
+ base += 64;
+ }
+
+ return out - initout;
+}
+CROARING_UNTARGET_AVX512
+#endif
+
CROARING_TARGET_AVX2
size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
uint32_t *out, size_t outcapacity,
@@ -9597,7 +12223,8 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
if (w == 0) {
baseVec = _mm256_add_epi32(baseVec, incVec);
} else {
- for (int k = 0; k < 4; ++k) {
+ int k;
+ for (k = 0; k < 4; ++k) {
uint8_t byteA = (uint8_t)w;
uint8_t byteB = (uint8_t)(w >> 8);
w >>= 16;
@@ -9622,8 +12249,11 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
for (; (i < length) && (out < safeout); ++i) {
uint64_t w = words[i];
while ((w != 0) && (out < safeout)) {
- uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
- int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+ uint64_t t =
+ w & (~w + 1); // on x64, should compile to BLSI (careful: the
+ // Intel compiler seems to fail)
+ int r =
+ roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
uint32_t val = r + base;
memcpy(out, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
@@ -9634,18 +12264,20 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
}
return out - initout;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
#endif // CROARING_IS_X64
size_t bitset_extract_setbits(const uint64_t *words, size_t length,
uint32_t *out, uint32_t base) {
int outpos = 0;
- size_t i;
- for (i = 0; i < length; ++i) {
+ size_t i; for(i = 0; i < length; ++i) {
uint64_t w = words[i];
while (w != 0) {
- uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
- int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+ uint64_t t =
+ w & (~w + 1); // on x64, should compile to BLSI (careful: the
+ // Intel compiler seems to fail)
+ int r =
+ roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
uint32_t val = r + base;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
@@ -9657,19 +12289,16 @@ size_t bitset_extract_setbits(const uint64_t *words, size_t length,
return outpos;
}
-size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
- const uint64_t * __restrict__ words2,
- size_t length, uint16_t *out,
- uint16_t base) {
+size_t bitset_extract_intersection_setbits_uint16(
+ const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2,
+ size_t length, uint16_t *out, uint16_t base) {
int outpos = 0;
- size_t i;
-
- for (i = 0; i < length; ++i) {
+ size_t i; for(i = 0; i < length; ++i) {
uint64_t w = words1[i] & words2[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- out[outpos++] = r + base;
+ int r = roaring_trailing_zeroes(w);
+ out[outpos++] = (uint16_t)(r + base);
w ^= t;
}
base += 64;
@@ -9677,7 +12306,7 @@ size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__
return outpos;
}
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
/*
* Given a bitset containing "length" 64-bit words, write out the position
* of all the set bits to "out" as 16-bit integers, values start at "base" (can
@@ -9706,7 +12335,8 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
if (w == 0) {
baseVec = _mm_add_epi16(baseVec, incVec);
} else {
- for (int k = 0; k < 4; ++k) {
+ int k;
+ for (k = 0; k < 4; ++k) {
uint8_t byteA = (uint8_t)w;
uint8_t byteB = (uint8_t)(w >> 8);
w >>= 16;
@@ -9732,8 +12362,8 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
uint64_t w = words[i];
while ((w != 0) && (out < safeout)) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- *out = r + base;
+ int r = roaring_trailing_zeroes(w);
+ *out = (uint16_t)(r + base);
out++;
w ^= t;
}
@@ -9741,7 +12371,7 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
}
return out - initout;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
#endif
/*
@@ -9756,13 +12386,12 @@ CROARING_UNTARGET_REGION
size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
uint16_t *out, uint16_t base) {
int outpos = 0;
- size_t i;
- for (i = 0; i < length; ++i) {
+ size_t i; for(i = 0; i < length; ++i) {
uint64_t w = words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
- out[outpos++] = r + base;
+ int r = roaring_trailing_zeroes(w);
+ out[outpos++] = (uint16_t)(r + base);
w ^= t;
}
base += 64;
@@ -9772,8 +12401,10 @@ size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64)
-static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
+static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words,
+ uint64_t card,
+ const uint16_t *list,
+ uint64_t length) {
uint64_t offset, load, pos;
uint64_t shift = 6;
const uint16_t *end = list + length;
@@ -9797,7 +12428,8 @@ static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t c
return card;
}
-static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list,
+ uint64_t length) {
uint64_t pos;
const uint16_t *end = list + length;
@@ -9852,8 +12484,9 @@ static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, u
}
}
-static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
- uint64_t length) {
+static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card,
+ const uint16_t *list,
+ uint64_t length) {
uint64_t offset, load, pos;
uint64_t shift = 6;
const uint16_t *end = list + length;
@@ -9878,8 +12511,9 @@ static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, co
return card;
}
-static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
- uint64_t length) {
+static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card,
+ const uint16_t *list,
+ uint64_t length) {
uint64_t offset, load, newload, pos, index;
const uint16_t *end = list + length;
while (list != end) {
@@ -9895,8 +12529,10 @@ static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card,
return card;
}
-static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card,
- const uint16_t *list, uint64_t length) {
+static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words,
+ uint64_t card,
+ const uint16_t *list,
+ uint64_t length) {
uint64_t offset, load, newload, pos, index;
const uint16_t *end = list + length;
while (list != end) {
@@ -9912,7 +12548,9 @@ static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_
return card;
}
-static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+static inline void _scalar_bitset_set_list(uint64_t *words,
+ const uint16_t *list,
+ uint64_t length) {
uint64_t offset, load, newload, pos, index;
const uint16_t *end = list + length;
while (list != end) {
@@ -9928,7 +12566,7 @@ static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list
uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
uint64_t length) {
- if( croaring_avx2() ) {
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
return _asm_bitset_clear_list(words, card, list, length);
} else {
return _scalar_bitset_clear_list(words, card, list, length);
@@ -9937,7 +12575,7 @@ uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
const uint16_t *list, uint64_t length) {
- if( croaring_avx2() ) {
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
return _asm_bitset_set_list_withcard(words, card, list, length);
} else {
return _scalar_bitset_set_list_withcard(words, card, list, length);
@@ -9945,7 +12583,7 @@ uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
}
void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
- if( croaring_avx2() ) {
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
_asm_bitset_set_list(words, list, length);
} else {
_scalar_bitset_set_list(words, list, length);
@@ -10039,9 +12677,13 @@ void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace api {
#endif
-/* end file src/bitset_util.c */
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif/* end file src/bitset_util.c */
/* begin file src/containers/array.c */
/*
* array.c
@@ -10052,20 +12694,38 @@ void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
#include <stdio.h>
#include <stdlib.h>
+
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
extern inline uint16_t array_container_minimum(const array_container_t *arr);
extern inline uint16_t array_container_maximum(const array_container_t *arr);
-extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x);
+extern inline int array_container_index_equalorlarger(
+ const array_container_t *arr, uint16_t x);
extern inline int array_container_rank(const array_container_t *arr,
uint16_t x);
+extern inline uint32_t array_container_rank_many(const array_container_t *arr,
+ uint64_t start_rank,
+ const uint32_t *begin,
+ const uint32_t *end,
+ uint64_t *ans);
+extern inline int array_container_get_index(const array_container_t *arr,
+ uint16_t x);
extern inline bool array_container_contains(const array_container_t *arr,
uint16_t pos);
extern inline int array_container_cardinality(const array_container_t *array);
-extern inline bool array_container_nonzero_cardinality(const array_container_t *array);
+extern inline bool array_container_nonzero_cardinality(
+ const array_container_t *array);
extern inline int32_t array_container_serialized_size_in_bytes(int32_t card);
extern inline bool array_container_empty(const array_container_t *array);
extern inline bool array_container_full(const array_container_t *array);
@@ -10074,15 +12734,15 @@ extern inline bool array_container_full(const array_container_t *array);
array_container_t *array_container_create_given_capacity(int32_t size) {
array_container_t *container;
- if ((container = (array_container_t *)roaring_malloc(sizeof(array_container_t))) ==
- NULL) {
+ if ((container = (array_container_t *)roaring_malloc(
+ sizeof(array_container_t))) == NULL) {
return NULL;
}
- if( size <= 0 ) { // we don't want to rely on malloc(0)
+ if (size <= 0) { // we don't want to rely on malloc(0)
container->array = NULL;
- } else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) * size)) ==
- NULL) {
+ } else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) *
+ size)) == NULL) {
roaring_free(container);
return NULL;
}
@@ -10094,23 +12754,24 @@ array_container_t *array_container_create_given_capacity(int32_t size) {
}
/* Create a new array. Return NULL in case of failure. */
-array_container_t *array_container_create() {
+array_container_t *array_container_create(void) {
return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
}
/* Create a new array containing all values in [min,max). */
-array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
- array_container_t * answer = array_container_create_given_capacity(max - min + 1);
- uint32_t k;
- if(answer == NULL) return answer;
+array_container_t *array_container_create_range(uint32_t min, uint32_t max) {
+ array_container_t *answer =
+ array_container_create_given_capacity(max - min + 1);
+ if (answer == NULL) return answer;
answer->cardinality = 0;
- for(k = min; k < max; k++) {
- answer->array[answer->cardinality++] = k;
+ uint32_t k; for(k = min; k < max; k++) {
+ answer->array[answer->cardinality++] = k;
}
return answer;
}
/* Duplicate container */
+ALLOW_UNALIGNED
array_container_t *array_container_clone(const array_container_t *src) {
array_container_t *newcontainer =
array_container_create_given_capacity(src->capacity);
@@ -10124,9 +12785,8 @@ array_container_t *array_container_clone(const array_container_t *src) {
return newcontainer;
}
-void array_container_offset(const array_container_t *c,
- container_t **loc, container_t **hic,
- uint16_t offset) {
+void array_container_offset(const array_container_t *c, container_t **loc,
+ container_t **hic, uint16_t offset) {
array_container_t *lo = NULL, *hi = NULL;
int top, lo_cap, hi_cap;
@@ -10134,22 +12794,20 @@ void array_container_offset(const array_container_t *c,
lo_cap = count_less(c->array, c->cardinality, top);
if (loc && lo_cap) {
- int i;
lo = array_container_create_given_capacity(lo_cap);
- for (i = 0; i < lo_cap; ++i) {
+ int i; for(i = 0; i < lo_cap; ++i) {
array_container_add(lo, c->array[i] + offset);
}
- *loc = (container_t*)lo;
+ *loc = (container_t *)lo;
}
hi_cap = c->cardinality - lo_cap;
if (hic && hi_cap) {
- int i;
hi = array_container_create_given_capacity(hi_cap);
- for (i = lo_cap; i < c->cardinality; ++i) {
+ int i; for(i = lo_cap; i < c->cardinality; ++i) {
array_container_add(hi, c->array[i] + offset);
}
- *hic = (container_t*)hi;
+ *hic = (container_t *)hi;
}
}
@@ -10157,32 +12815,34 @@ int array_container_shrink_to_fit(array_container_t *src) {
if (src->cardinality == src->capacity) return 0; // nothing to do
int savings = src->capacity - src->cardinality;
src->capacity = src->cardinality;
- if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs
- roaring_free(src->array);
- src->array = NULL;
+ if (src->capacity ==
+ 0) { // we do not want to rely on realloc for zero allocs
+ roaring_free(src->array);
+ src->array = NULL;
} else {
- uint16_t *oldarray = src->array;
- src->array =
- (uint16_t *)roaring_realloc(oldarray, src->capacity * sizeof(uint16_t));
- if (src->array == NULL) roaring_free(oldarray); // should never happen?
+ uint16_t *oldarray = src->array;
+ src->array = (uint16_t *)roaring_realloc(
+ oldarray, src->capacity * sizeof(uint16_t));
+ if (src->array == NULL) roaring_free(oldarray); // should never happen?
}
return savings;
}
/* Free memory. */
void array_container_free(array_container_t *arr) {
- if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise
+ if (arr->array !=
+ NULL) { // Jon Strabala reports that some tools complain otherwise
roaring_free(arr->array);
- arr->array = NULL; // pedantic
+ arr->array = NULL; // pedantic
}
roaring_free(arr);
}
static inline int32_t grow_capacity(int32_t capacity) {
- return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE
- : capacity < 64 ? capacity * 2
- : capacity < 1024 ? capacity * 3 / 2
- : capacity * 5 / 4;
+ return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE
+ : capacity < 64 ? capacity * 2
+ : capacity < 1024 ? capacity * 3 / 2
+ : capacity * 5 / 4;
}
static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
@@ -10191,7 +12851,6 @@ static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
void array_container_grow(array_container_t *container, int32_t min,
bool preserve) {
-
int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max);
@@ -10205,16 +12864,13 @@ void array_container_grow(array_container_t *container, int32_t min,
} else {
// Jon Strabala reports that some tools complain otherwise
if (array != NULL) {
- roaring_free(array);
+ roaring_free(array);
}
- container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
+ container->array =
+ (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
}
- // handle the case where realloc fails
- if (container->array == NULL) {
- fprintf(stderr, "could not allocate memory\n");
- }
- assert(container->array != NULL);
+ // if realloc fails, we have container->array == NULL.
}
/* Copy one container into another. We assume that they are distinct. */
@@ -10231,8 +12887,7 @@ void array_container_copy(const array_container_t *src,
void array_container_add_from_range(array_container_t *arr, uint32_t min,
uint32_t max, uint16_t step) {
- uint32_t value;
- for (value = min; value < max; value += step) {
+ uint32_t value; for(value = min; value < max; value += step) {
array_container_append(arr, value);
}
}
@@ -10247,11 +12902,10 @@ void array_container_union(const array_container_t *array_1,
const int32_t max_cardinality = card_1 + card_2;
if (out->capacity < max_cardinality) {
- array_container_grow(out, max_cardinality, false);
+ array_container_grow(out, max_cardinality, false);
}
- out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1,
- array_2->array, card_2, out->array);
-
+ out->cardinality = (int32_t)fast_union_uint16(
+ array_1->array, card_1, array_2->array, card_2, out->array);
}
/* Computes the difference of array1 and array2 and write the result
@@ -10263,16 +12917,17 @@ void array_container_andnot(const array_container_t *array_1,
array_container_t *out) {
if (out->capacity < array_1->cardinality)
array_container_grow(out, array_1->cardinality, false);
-#ifdef CROARING_IS_X64
- if(( croaring_avx2() ) && (out != array_1) && (out != array_2)) {
- out->cardinality =
- difference_vector16(array_1->array, array_1->cardinality,
- array_2->array, array_2->cardinality, out->array);
- } else {
- out->cardinality =
- difference_uint16(array_1->array, array_1->cardinality, array_2->array,
- array_2->cardinality, out->array);
- }
+#if CROARING_IS_X64
+ if ((croaring_hardware_support() & ROARING_SUPPORTS_AVX2) &&
+ (out != array_1) && (out != array_2)) {
+ out->cardinality = difference_vector16(
+ array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
+ } else {
+ out->cardinality =
+ difference_uint16(array_1->array, array_1->cardinality,
+ array_2->array, array_2->cardinality, out->array);
+ }
#else
out->cardinality =
difference_uint16(array_1->array, array_1->cardinality, array_2->array,
@@ -10294,15 +12949,15 @@ void array_container_xor(const array_container_t *array_1,
array_container_grow(out, max_cardinality, false);
}
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
- out->cardinality =
- xor_vector16(array_1->array, array_1->cardinality, array_2->array,
- array_2->cardinality, out->array);
+#if CROARING_IS_X64
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
+ out->cardinality =
+ xor_vector16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
} else {
- out->cardinality =
- xor_uint16(array_1->array, array_1->cardinality, array_2->array,
- array_2->cardinality, out->array);
+ out->cardinality =
+ xor_uint16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
}
#else
out->cardinality =
@@ -10325,14 +12980,14 @@ void array_container_intersection(const array_container_t *array1,
int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
min_card = minimum_int32(card_1, card_2);
const int threshold = 64; // subject to tuning
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
if (out->capacity < min_card) {
- array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
- false);
+ array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
+ false);
}
#else
if (out->capacity < min_card) {
- array_container_grow(out, min_card, false);
+ array_container_grow(out, min_card, false);
}
#endif
@@ -10343,14 +12998,14 @@ void array_container_intersection(const array_container_t *array1,
out->cardinality = intersect_skewed_uint16(
array2->array, card_2, array1->array, card_1, out->array);
} else {
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
- out->cardinality = intersect_vector16(
- array1->array, card_1, array2->array, card_2, out->array);
- } else {
- out->cardinality = intersect_uint16(array1->array, card_1,
- array2->array, card_2, out->array);
- }
+#if CROARING_IS_X64
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
+ out->cardinality = intersect_vector16(
+ array1->array, card_1, array2->array, card_2, out->array);
+ } else {
+ out->cardinality = intersect_uint16(
+ array1->array, card_1, array2->array, card_2, out->array);
+ }
#else
out->cardinality = intersect_uint16(array1->array, card_1,
array2->array, card_2, out->array);
@@ -10371,14 +13026,14 @@ int array_container_intersection_cardinality(const array_container_t *array1,
return intersect_skewed_uint16_cardinality(array2->array, card_2,
array1->array, card_1);
} else {
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
- return intersect_vector16_cardinality(array1->array, card_1,
- array2->array, card_2);
- } else {
- return intersect_uint16_cardinality(array1->array, card_1,
- array2->array, card_2);
- }
+#if CROARING_IS_X64
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
+ return intersect_vector16_cardinality(array1->array, card_1,
+ array2->array, card_2);
+ } else {
+ return intersect_uint16_cardinality(array1->array, card_1,
+ array2->array, card_2);
+ }
#else
return intersect_uint16_cardinality(array1->array, card_1,
array2->array, card_2);
@@ -10387,19 +13042,19 @@ int array_container_intersection_cardinality(const array_container_t *array1,
}
bool array_container_intersect(const array_container_t *array1,
- const array_container_t *array2) {
+ const array_container_t *array2) {
int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
const int threshold = 64; // subject to tuning
if (card_1 * threshold < card_2) {
- return intersect_skewed_uint16_nonempty(
- array1->array, card_1, array2->array, card_2);
+ return intersect_skewed_uint16_nonempty(array1->array, card_1,
+ array2->array, card_2);
} else if (card_2 * threshold < card_1) {
- return intersect_skewed_uint16_nonempty(
- array2->array, card_2, array1->array, card_1);
+ return intersect_skewed_uint16_nonempty(array2->array, card_2,
+ array1->array, card_1);
} else {
- // we do not bother vectorizing
- return intersect_uint16_nonempty(array1->array, card_1,
- array2->array, card_2);
+ // we do not bother vectorizing
+ return intersect_uint16_nonempty(array1->array, card_1, array2->array,
+ card_2);
}
}
@@ -10408,7 +13063,6 @@ bool array_container_intersect(const array_container_t *array1,
* */
void array_container_intersection_inplace(array_container_t *src_1,
const array_container_t *src_2) {
- // todo: can any of this be vectorized?
int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
const int threshold = 64; // subject to tuning
if (card_1 * threshold < card_2) {
@@ -10418,17 +13072,41 @@ void array_container_intersection_inplace(array_container_t *src_1,
src_1->cardinality = intersect_skewed_uint16(
src_2->array, card_2, src_1->array, card_1, src_1->array);
} else {
+#if CROARING_IS_X64
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
+ src_1->cardinality = intersect_vector16_inplace(
+ src_1->array, card_1, src_2->array, card_2);
+ } else {
+ src_1->cardinality = intersect_uint16(
+ src_1->array, card_1, src_2->array, card_2, src_1->array);
+ }
+#else
src_1->cardinality = intersect_uint16(
src_1->array, card_1, src_2->array, card_2, src_1->array);
+#endif
}
}
ALLOW_UNALIGNED
int array_container_to_uint32_array(void *vout, const array_container_t *cont,
uint32_t base) {
- int outpos = 0, i;
+#if CROARING_IS_X64
+ int support = croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if (support & ROARING_SUPPORTS_AVX512) {
+ return avx512_array_container_to_uint32_array(vout, cont->array,
+ cont->cardinality, base);
+ }
+#endif
+ if (support & ROARING_SUPPORTS_AVX2) {
+ return array_container_to_uint32_array_vector16(
+ vout, cont->array, cont->cardinality, base);
+ }
+#endif // CROARING_IS_X64
+ int outpos = 0;
uint32_t *out = (uint32_t *)vout;
- for (i = 0; i < cont->cardinality; ++i) {
+ size_t i = 0;
+ for (; i < (size_t)cont->cardinality; ++i) {
const uint32_t val = base + cont->array[i];
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
@@ -10437,16 +13115,15 @@ int array_container_to_uint32_array(void *vout, const array_container_t *cont,
return outpos;
}
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
void array_container_printf(const array_container_t *v) {
- int i;
- if (v->cardinality == 0) {
+ if (v->cardinality == 0) {
printf("{}");
return;
}
printf("{");
printf("%d", v->array[0]);
- for (i = 1; i < v->cardinality; ++i) {
+ int i; for(i = 1; i < v->cardinality; ++i) {
printf(",%d", v->array[i]);
}
printf("}");
@@ -10454,25 +13131,63 @@ void array_container_printf(const array_container_t *v) {
void array_container_printf_as_uint32_array(const array_container_t *v,
uint32_t base) {
- int i;
-
if (v->cardinality == 0) {
return;
}
printf("%u", v->array[0] + base);
- for (i = 1; i < v->cardinality; ++i) {
+ int i; for(i = 1; i < v->cardinality; ++i) {
printf(",%u", v->array[i] + base);
}
}
#endif
+
+/*
+ * Validate the container. Returns true if valid.
+ */
+bool array_container_validate(const array_container_t *v, const char **reason) {
+ if (v->capacity < 0) {
+ *reason = "negative capacity";
+ return false;
+ }
+ if (v->cardinality < 0) {
+ *reason = "negative cardinality";
+ return false;
+ }
+ if (v->cardinality > v->capacity) {
+ *reason = "cardinality exceeds capacity";
+ return false;
+ }
+ if (v->cardinality > DEFAULT_MAX_SIZE) {
+ *reason = "cardinality exceeds DEFAULT_MAX_SIZE";
+ return false;
+ }
+ if (v->cardinality == 0) {
+ *reason = "zero cardinality";
+ return false;
+ }
+
+ if (v->array == NULL) {
+ *reason = "NULL array pointer";
+ return false;
+ }
+ uint16_t prev = v->array[0];
+ int i; for(i = 1; i < v->cardinality; ++i) {
+ if (v->array[i] <= prev) {
+ *reason = "array elements not strictly increasing";
+ return false;
+ }
+ prev = v->array[i];
+ }
+
+ return true;
+}
/* Compute the number of runs */
int32_t array_container_number_of_runs(const array_container_t *ac) {
// Can SIMD work here?
int32_t nr_runs = 0;
int32_t prev = -2;
- uint16_t *p;
- for (p = ac->array; p != ac->array + ac->cardinality; ++p) {
+ const uint16_t *p; for(p = ac->array; p != ac->array + ac->cardinality; ++p) {
if (*p != prev + 1) nr_runs++;
prev = *p;
}
@@ -10526,8 +13241,7 @@ int32_t array_container_read(int32_t cardinality, array_container_t *container,
bool array_container_iterate(const array_container_t *cont, uint32_t base,
roaring_iterator iterator, void *ptr) {
- int i;
- for (i = 0; i < cont->cardinality; i++)
+ int i; for(i = 0; i < cont->cardinality; i++)
if (!iterator(cont->array[i] + base, ptr)) return false;
return true;
}
@@ -10535,15 +13249,16 @@ bool array_container_iterate(const array_container_t *cont, uint32_t base,
bool array_container_iterate64(const array_container_t *cont, uint32_t base,
roaring_iterator64 iterator, uint64_t high_bits,
void *ptr) {
- int i;
- for (i = 0; i < cont->cardinality; i++)
+ int i; for(i = 0; i < cont->cardinality; i++)
if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr))
return false;
return true;
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/array.c */
/* begin file src/containers/bitset.c */
@@ -10560,19 +13275,37 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base,
#include <string.h>
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
-extern inline int bitset_container_cardinality(const bitset_container_t *bitset);
-extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos);
+extern inline int bitset_container_cardinality(
+ const bitset_container_t *bitset);
+extern inline void bitset_container_set(bitset_container_t *bitset,
+ uint16_t pos);
// unused at this time:
-//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
+// extern inline void bitset_container_unset(bitset_container_t *bitset,
+// uint16_t pos);
extern inline bool bitset_container_get(const bitset_container_t *bitset,
uint16_t pos);
extern inline int32_t bitset_container_serialized_size_in_bytes(void);
-extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos);
-extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_add(bitset_container_t *bitset,
+ uint16_t pos);
+extern inline bool bitset_container_remove(bitset_container_t *bitset,
+ uint16_t pos);
extern inline bool bitset_container_contains(const bitset_container_t *bitset,
uint16_t pos);
@@ -10587,8 +13320,6 @@ void bitset_container_set_all(bitset_container_t *bitset) {
bitset->cardinality = (1 << 16);
}
-
-
/* Create a new bitset. Return NULL in case of failure. */
bitset_container_t *bitset_container_create(void) {
bitset_container_t *bitset =
@@ -10597,9 +13328,20 @@ bitset_container_t *bitset_container_create(void) {
if (!bitset) {
return NULL;
}
- // sizeof(__m256i) == 32
+
+ size_t align_size = 32;
+#if CROARING_IS_X64
+ int support = croaring_hardware_support();
+ if (support & ROARING_SUPPORTS_AVX512) {
+ // sizeof(__m512i) == 64
+ align_size = 64;
+ } else {
+ // sizeof(__m256i) == 32
+ align_size = 32;
+ }
+#endif
bitset->words = (uint64_t *)roaring_aligned_malloc(
- 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
if (!bitset->words) {
roaring_free(bitset);
return NULL;
@@ -10621,13 +13363,11 @@ void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
if (step == 0) return; // refuse to crash
if ((64 % step) == 0) { // step divides 64
uint64_t mask = 0; // construct the repeated mask
- uint32_t value;
- for (value = (min % step); value < 64; value += step) {
+ uint32_t value; for(value = (min % step); value < 64; value += step) {
mask |= ((uint64_t)1 << value);
}
uint32_t firstword = min / 64;
uint32_t endword = (max - 1) / 64;
- uint32_t i;
bitset->cardinality = (max - min + step - 1) / step;
if (firstword == endword) {
bitset->words[firstword] |=
@@ -10636,12 +13376,11 @@ void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
return;
}
bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64));
- for (i = firstword + 1; i < endword; i++)
+ uint32_t i; for(i = firstword + 1; i < endword; i++)
bitset->words[i] = mask;
bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64));
} else {
- uint32_t value;
- for (value = min; value < max; value += step) {
+ uint32_t value; for(value = min; value < max; value += step) {
bitset_container_add(bitset, value);
}
}
@@ -10649,14 +13388,16 @@ void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
/* Free memory. */
void bitset_container_free(bitset_container_t *bitset) {
- if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise
- roaring_aligned_free(bitset->words);
- bitset->words = NULL; // pedantic
+ if (bitset->words !=
+ NULL) { // Jon Strabala reports that some tools complain otherwise
+ roaring_aligned_free(bitset->words);
+ bitset->words = NULL; // pedantic
}
roaring_free(bitset);
}
/* duplicate container. */
+ALLOW_UNALIGNED
bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
bitset_container_t *bitset =
(bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
@@ -10664,9 +13405,19 @@ bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
if (!bitset) {
return NULL;
}
- // sizeof(__m256i) == 32
+
+ size_t align_size = 32;
+#if CROARING_IS_X64
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
+ // sizeof(__m512i) == 64
+ align_size = 64;
+ } else {
+ // sizeof(__m256i) == 32
+ align_size = 32;
+ }
+#endif
bitset->words = (uint64_t *)roaring_aligned_malloc(
- 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
if (!bitset->words) {
roaring_free(bitset);
return NULL;
@@ -10677,9 +13428,8 @@ bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
return bitset;
}
-void bitset_container_offset(const bitset_container_t *c,
- container_t **loc, container_t **hic,
- uint16_t offset) {
+void bitset_container_offset(const bitset_container_t *c, container_t **loc,
+ container_t **hic, uint16_t offset) {
bitset_container_t *bc = NULL;
uint64_t val;
uint16_t b, i, end;
@@ -10691,14 +13441,13 @@ void bitset_container_offset(const bitset_container_t *c,
if (loc != NULL) {
bc = bitset_container_create();
if (i == 0) {
- memcpy(bc->words+b, c->words, 8*end);
+ memcpy(bc->words + b, c->words, 8 * end);
} else {
- uint32_t k;
bc->words[b] = c->words[0] << i;
- for (k = 1; k < end; ++k) {
+ uint32_t k; for(k = 1; k < end; ++k) {
val = c->words[k] << i;
- val |= c->words[k-1] >> (64 - i);
- bc->words[b+k] = val;
+ val |= c->words[k - 1] >> (64 - i);
+ bc->words[b + k] = val;
}
}
@@ -10715,7 +13464,7 @@ void bitset_container_offset(const bitset_container_t *c,
// Both hic and loc can't be NULL, so bc is never NULL here
if (bc->cardinality == 0) {
bitset_container_free(bc);
- }
+ }
return;
}
@@ -10724,22 +13473,20 @@ void bitset_container_offset(const bitset_container_t *c,
}
if (i == 0) {
- memcpy(bc->words, c->words+end, 8*b);
+ memcpy(bc->words, c->words + end, 8 * b);
} else {
- uint32_t k;
-
- for ( k = end; k < 1024; ++k) {
+ uint32_t k; for(k = end; k < 1024; ++k) {
val = c->words[k] << i;
- val |= c->words[k-1] >> (64 - i);
- bc->words[k-end] = val;
+ val |= c->words[k - 1] >> (64 - i);
+ bc->words[k - end] = val;
}
bc->words[b] = c->words[1023] >> (64 - i);
}
bc->cardinality = bitset_container_compute_cardinality(bc);
if (bc->cardinality == 0) {
- bitset_container_free(bc);
- return;
+ bitset_container_free(bc);
+ return;
}
*hic = bc;
}
@@ -10751,55 +13498,63 @@ void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
bitset_container_compute_cardinality(bitset); // could be smarter
}
-
bool bitset_container_intersect(const bitset_container_t *src_1,
- const bitset_container_t *src_2) {
- // could vectorize, but this is probably already quite fast in practice
- const uint64_t * __restrict__ words_1 = src_1->words;
- const uint64_t * __restrict__ words_2 = src_2->words;
- int i;
- for ( i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
- if((words_1[i] & words_2[i]) != 0) return true;
+ const bitset_container_t *src_2) {
+ // could vectorize, but this is probably already quite fast in practice
+ const uint64_t *__restrict__ words_1 = src_1->words;
+ const uint64_t *__restrict__ words_2 = src_2->words;
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) {
+ if ((words_1[i] & words_2[i]) != 0) return true;
}
return false;
}
-
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
#ifndef WORDS_IN_AVX2_REG
#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
#endif
+#ifndef WORDS_IN_AVX512_REG
+#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t)
+#endif
/* Get the number of bits set (force computation) */
-static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) {
- const uint64_t *words = bitset->words;
- int32_t sum = 0;
- for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
- sum += hamming(words[i]);
- sum += hamming(words[i + 1]);
- sum += hamming(words[i + 2]);
- sum += hamming(words[i + 3]);
- }
- return sum;
+static inline int _scalar_bitset_container_compute_cardinality(
+ const bitset_container_t *bitset) {
+ const uint64_t *words = bitset->words;
+ int32_t sum = 0;
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
+ sum += roaring_hamming(words[i]);
+ sum += roaring_hamming(words[i + 1]);
+ sum += roaring_hamming(words[i + 2]);
+ sum += roaring_hamming(words[i + 3]);
+ }
+ return sum;
}
/* Get the number of bits set (force computation) */
int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
- if( croaring_avx2() ) {
- return (int) avx2_harley_seal_popcount256(
- (const __m256i *)bitset->words,
- BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
- } else {
- return _scalar_bitset_container_compute_cardinality(bitset);
-
- }
+ int support = croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if (support & ROARING_SUPPORTS_AVX512) {
+ return (int)avx512_vpopcount(
+ (const __m512i *)bitset->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG));
+ } else
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+ if (support & ROARING_SUPPORTS_AVX2) {
+ return (int)avx2_harley_seal_popcount256(
+ (const __m256i *)bitset->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
+ } else {
+ return _scalar_bitset_container_compute_cardinality(bitset);
+ }
}
-#elif defined(USENEON)
+#elif defined(CROARING_USENEON)
int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
uint16x8_t n0 = vdupq_n_u16(0);
uint16x8_t n1 = vdupq_n_u16(0);
uint16x8_t n2 = vdupq_n_u16(0);
uint16x8_t n3 = vdupq_n_u16(0);
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) {
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) {
uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]);
n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0))));
uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]);
@@ -10817,26 +13572,184 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1);
}
-#else // CROARING_IS_X64
+#else // CROARING_IS_X64
/* Get the number of bits set (force computation) */
int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
const uint64_t *words = bitset->words;
- int32_t sum = 0, i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
- sum += hamming(words[i]);
- sum += hamming(words[i + 1]);
- sum += hamming(words[i + 2]);
- sum += hamming(words[i + 3]);
+ int32_t sum = 0;
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
+ sum += roaring_hamming(words[i]);
+ sum += roaring_hamming(words[i + 1]);
+ sum += roaring_hamming(words[i + 2]);
+ sum += roaring_hamming(words[i + 3]);
}
return sum;
}
-#endif // CROARING_IS_X64
+#endif // CROARING_IS_X64
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
#define BITSET_CONTAINER_FN_REPEAT 8
+#ifndef WORDS_IN_AVX512_REG
+#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t)
+#endif // WORDS_IN_AVX512_REG
+
+/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
+ result to bitsetout */
+// clang-format off
+#define AVX512_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ static inline int _avx512_bitset_container_##opname##_nocard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint8_t * __restrict__ words_1 = (const uint8_t *)src_1->words; \
+ const uint8_t * __restrict__ words_2 = (const uint8_t *)src_2->words; \
+ /* not using the blocking optimization for some reason*/ \
+ uint8_t *out = (uint8_t*)dst->words; \
+ const int innerloop = 8; \
+ size_t i; for(i = 0; \
+ i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG); \
+ i+=innerloop) { \
+ __m512i A1, A2, AO; \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)out, AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 64)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 64)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+64), AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 128)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 128)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+128), AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 192)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 192)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+192), AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 256)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 256)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+256), AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 320)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 320)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+320), AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 384)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 384)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+384), AO); \
+ A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 448)); \
+ A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 448)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm512_storeu_si512((__m512i *)(out+448), AO); \
+ out+=512; \
+ words_1 += 512; \
+ words_2 += 512; \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+ }
+
+#define AVX512_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ /* next, a version that updates cardinality*/ \
+ static inline int _avx512_bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const __m512i * __restrict__ words_1 = (const __m512i *) src_1->words; \
+ const __m512i * __restrict__ words_2 = (const __m512i *) src_2->words; \
+ __m512i *out = (__m512i *) dst->words; \
+ dst->cardinality = (int32_t)avx512_harley_seal_popcount512andstore_##opname(words_2,\
+ words_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG)); \
+ return dst->cardinality; \
+ }
+
+#define AVX512_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ /* next, a version that just computes the cardinality*/ \
+ static inline int _avx512_bitset_container_##opname##_justcard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2) { \
+ const __m512i * __restrict__ data1 = (const __m512i *) src_1->words; \
+ const __m512i * __restrict__ data2 = (const __m512i *) src_2->words; \
+ return (int)avx512_harley_seal_popcount512_##opname(data2, \
+ data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG)); \
+ }
+
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, or, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor, ^, _mm512_xor_si512, veorq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, or, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor, ^, _mm512_xor_si512, veorq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, or, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor, ^, _mm512_xor_si512, veorq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+
#ifndef WORDS_IN_AVX2_REG
#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
#endif // WORDS_IN_AVX2_REG
@@ -10857,7 +13770,7 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
/* not using the blocking optimization for some reason*/ \
uint8_t *out = (uint8_t *)dst->words; \
const int innerloop = 8; \
- for (size_t i = 0; \
+ size_t i; for(i = 0; \
i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \
i += innerloop) { \
__m256i A1, A2, AO; \
@@ -10930,72 +13843,72 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
// we duplicate the function because other containers use the "or" term, makes API more consistent
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
// we duplicate the function because other containers use the "intersection" term, makes API more consistent
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
// we duplicate the function because other containers use the "or" term, makes API more consistent
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
// we duplicate the function because other containers use the "intersection" term, makes API more consistent
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
// we duplicate the function because other containers use the "or" term, makes API more consistent
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
// we duplicate the function because other containers use the "intersection" term, makes API more consistent
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \
@@ -11007,13 +13920,13 @@ CROARING_UNTARGET_REGION
const uint64_t *__restrict__ words_2 = src_2->words; \
uint64_t *out = dst->words; \
int32_t sum = 0; \
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
out[i] = word_1; \
out[i + 1] = word_2; \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
+ sum += roaring_hamming(word_1); \
+ sum += roaring_hamming(word_2); \
} \
dst->cardinality = sum; \
return dst->cardinality; \
@@ -11024,7 +13937,7 @@ CROARING_UNTARGET_REGION
const uint64_t *__restrict__ words_1 = src_1->words; \
const uint64_t *__restrict__ words_2 = src_2->words; \
uint64_t *out = dst->words; \
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
out[i] = (words_1[i])opsymbol(words_2[i]); \
} \
dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
@@ -11035,11 +13948,11 @@ CROARING_UNTARGET_REGION
const uint64_t *__restrict__ words_1 = src_1->words; \
const uint64_t *__restrict__ words_2 = src_2->words; \
int32_t sum = 0; \
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
+ sum += roaring_hamming(word_1); \
+ sum += roaring_hamming(word_2); \
} \
return sum; \
}
@@ -11055,12 +13968,16 @@ SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
-
+#if CROARING_COMPILER_SUPPORTS_AVX512
#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
int bitset_container_##opname(const bitset_container_t *src_1, \
const bitset_container_t *src_2, \
bitset_container_t *dst) { \
- if ( croaring_avx2() ) { \
+ int support = croaring_hardware_support(); \
+ if ( support & ROARING_SUPPORTS_AVX512 ) { \
+ return _avx512_bitset_container_##opname(src_1, src_2, dst); \
+ } \
+ else if ( support & ROARING_SUPPORTS_AVX2 ) { \
return _avx2_bitset_container_##opname(src_1, src_2, dst); \
} else { \
return _scalar_bitset_container_##opname(src_1, src_2, dst); \
@@ -11069,7 +13986,11 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
const bitset_container_t *src_2, \
bitset_container_t *dst) { \
- if ( croaring_avx2() ) { \
+ int support = croaring_hardware_support(); \
+ if ( support & ROARING_SUPPORTS_AVX512 ) { \
+ return _avx512_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } \
+ else if ( support & ROARING_SUPPORTS_AVX2 ) { \
return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \
} else { \
return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \
@@ -11077,17 +13998,51 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
} \
int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
const bitset_container_t *src_2) { \
- if ((croaring_detect_supported_architectures() & CROARING_AVX2) == \
- CROARING_AVX2) { \
+ int support = croaring_hardware_support(); \
+ if ( support & ROARING_SUPPORTS_AVX512 ) { \
+ return _avx512_bitset_container_##opname##_justcard(src_1, src_2); \
+ } \
+ else if ( support & ROARING_SUPPORTS_AVX2 ) { \
return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \
} else { \
return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \
} \
}
+#else // CROARING_COMPILER_SUPPORTS_AVX512
-#elif defined(USENEON)
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+ int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { \
+ return _avx2_bitset_container_##opname(src_1, src_2, dst); \
+ } else { \
+ return _scalar_bitset_container_##opname(src_1, src_2, dst); \
+ } \
+ } \
+ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { \
+ return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } else { \
+ return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } \
+ } \
+ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { \
+ return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \
+ } else { \
+ return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \
+ } \
+ }
+
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+
+#elif defined(CROARING_USENEON)
#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
int bitset_container_##opname(const bitset_container_t *src_1, \
@@ -11100,7 +14055,7 @@ int bitset_container_##opname(const bitset_container_t *src_1, \
uint16x8_t n1 = vdupq_n_u16(0); \
uint16x8_t n2 = vdupq_n_u16(0); \
uint16x8_t n3 = vdupq_n_u16(0); \
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
vld1q_u64(&words_2[i + 0])); \
n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
@@ -11132,7 +14087,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
const uint64_t * __restrict__ words_1 = src_1->words; \
const uint64_t * __restrict__ words_2 = src_2->words; \
uint64_t *out = dst->words; \
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
vld1q_u64(&words_2[i + 0]))); \
vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
@@ -11153,7 +14108,7 @@ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
uint16x8_t n1 = vdupq_n_u16(0); \
uint16x8_t n2 = vdupq_n_u16(0); \
uint16x8_t n3 = vdupq_n_u16(0); \
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
vld1q_u64(&words_2[i + 0])); \
n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
@@ -11185,14 +14140,13 @@ int bitset_container_##opname(const bitset_container_t *src_1, \
const uint64_t * __restrict__ words_2 = src_2->words; \
uint64_t *out = dst->words; \
int32_t sum = 0; \
- size_t i; \
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
out[i] = word_1; \
out[i + 1] = word_2; \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
+ sum += roaring_hamming(word_1); \
+ sum += roaring_hamming(word_2); \
} \
dst->cardinality = sum; \
return dst->cardinality; \
@@ -11203,8 +14157,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
const uint64_t * __restrict__ words_1 = src_1->words; \
const uint64_t * __restrict__ words_2 = src_2->words; \
uint64_t *out = dst->words; \
- size_t i; \
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
out[i] = (words_1[i])opsymbol(words_2[i]); \
} \
dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
@@ -11212,15 +14165,14 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
} \
int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
const bitset_container_t *src_2) { \
- const uint64_t * __restrict__ words_1 = src_1->words; \
+ /* printf("A1\n"); */ const uint64_t * __restrict__ words_1 = src_1->words; \
const uint64_t * __restrict__ words_2 = src_2->words; \
int32_t sum = 0; \
- size_t i; \
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
- sum += hamming(word_1); \
- sum += hamming(word_2); \
+ sum += roaring_hamming(word_1); \
+ sum += roaring_hamming(word_2); \
} \
return sum; \
}
@@ -11246,8 +14198,15 @@ int bitset_container_to_uint32_array(
const bitset_container_t *bc,
uint32_t base
){
-#ifdef CROARING_IS_X64
- if(( croaring_avx2() ) && (bc->cardinality >= 8192)) // heuristic
+#if CROARING_IS_X64
+ int support = croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if(( support & ROARING_SUPPORTS_AVX512 ) && (bc->cardinality >= 8192)) // heuristic
+ return (int) bitset_extract_setbits_avx512(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
+ else
+#endif
+ if(( support & ROARING_SUPPORTS_AVX2 ) && (bc->cardinality >= 8192)) // heuristic
return (int) bitset_extract_setbits_avx2(bc->words,
BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
else
@@ -11259,20 +14218,19 @@ int bitset_container_to_uint32_array(
#endif
}
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
/*
* Print this container using printf (useful for debugging).
*/
void bitset_container_printf(const bitset_container_t * v) {
- printf("{");
+ printf("{");
uint32_t base = 0;
bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
- int i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
uint64_t w = v->words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
+ int r = roaring_trailing_zeroes(w);
if(iamfirst) {// predicted to be false
printf("%u",base + r);
iamfirst = false;
@@ -11286,18 +14244,16 @@ void bitset_container_printf(const bitset_container_t * v) {
printf("}");
}
-
/*
* Print this container using printf as a comma-separated list of 32-bit integers starting at base.
*/
void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) {
- bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
- int i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
uint64_t w = v->words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
+ int r = roaring_trailing_zeroes(w);
if(iamfirst) {// predicted to be false
printf("%u", r + base);
iamfirst = false;
@@ -11310,21 +14266,45 @@ void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint3
}
}
#endif
+
+/*
+ * Validate the container. Returns true if valid.
+ */
+bool bitset_container_validate(const bitset_container_t *v, const char **reason) {
+ if (v->words == NULL) {
+ *reason = "words is NULL";
+ return false;
+ }
+ if (v->cardinality != bitset_container_compute_cardinality(v)) {
+ *reason = "cardinality is incorrect";
+ return false;
+ }
+ if (v->cardinality <= DEFAULT_MAX_SIZE) {
+ *reason = "cardinality is too small for a bitmap container";
+ return false;
+ }
+ // Attempt to forcibly load the first and last words, hopefully causing
+ // a segfault or an address sanitizer error if words is not allocated.
+ volatile uint64_t *words = v->words;
+ (void) words[0];
+ (void) words[BITSET_CONTAINER_SIZE_IN_WORDS - 1];
+ return true;
+}
// TODO: use the fast lower bound, also
int bitset_container_number_of_runs(bitset_container_t *bc) {
- int num_runs = 0, i;
+ int num_runs = 0;
uint64_t next_word = bc->words[0];
-
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
+
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
uint64_t word = next_word;
next_word = bc->words[i+1];
- num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
+ num_runs += roaring_hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
}
uint64_t word = next_word;
- num_runs += hamming((~word) & (word << 1));
+ num_runs += roaring_hamming((~word) & (word << 1));
if((word & 0x8000000000000000ULL) != 0)
num_runs++;
return num_runs;
@@ -11346,12 +14326,11 @@ int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container
}
bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
- int32_t i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ int32_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
uint64_t w = cont->words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
+ int r = roaring_trailing_zeroes(w);
if(!iterator(r + base, ptr)) return false;
w ^= t;
}
@@ -11361,12 +14340,11 @@ bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roa
}
bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) {
- int32_t i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ int32_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
uint64_t w = cont->words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
+ int r = roaring_trailing_zeroes(w);
if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
w ^= t;
}
@@ -11375,13 +14353,31 @@ bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, r
return true;
}
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+ALLOW_UNALIGNED
+static inline bool _avx512_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
+ const __m512i *ptr1 = (const __m512i*)container1->words;
+ const __m512i *ptr2 = (const __m512i*)container2->words;
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/64; i++) {
+ __m512i r1 = _mm512_loadu_si512(ptr1+i);
+ __m512i r2 = _mm512_loadu_si512(ptr2+i);
+ __mmask64 mask = _mm512_cmpeq_epi8_mask(r1, r2);
+ if ((uint64_t)mask != UINT64_MAX) {
+ return false;
+ }
+ }
+ return true;
+}
+CROARING_UNTARGET_AVX512
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
CROARING_TARGET_AVX2
ALLOW_UNALIGNED
static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
const __m256i *ptr1 = (const __m256i*)container1->words;
const __m256i *ptr2 = (const __m256i*)container2->words;
- for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
+ size_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
__m256i r1 = _mm256_loadu_si256(ptr1+i);
__m256i r2 = _mm256_loadu_si256(ptr2+i);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
@@ -11391,7 +14387,7 @@ static inline bool _avx2_bitset_container_equals(const bitset_container_t *conta
}
return true;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
#endif // CROARING_IS_X64
ALLOW_UNALIGNED
@@ -11404,8 +14400,15 @@ bool bitset_container_equals(const bitset_container_t *container1, const bitset_
return true;
}
}
-#ifdef CROARING_IS_X64
- if( croaring_avx2() ) {
+#if CROARING_IS_X64
+ int support = croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if( support & ROARING_SUPPORTS_AVX512 ) {
+ return _avx512_bitset_container_equals(container1, container2);
+ }
+ else
+#endif
+ if( support & ROARING_SUPPORTS_AVX2 ) {
return _avx2_bitset_container_equals(container1, container2);
}
#endif
@@ -11438,15 +14441,14 @@ bool bitset_container_select(const bitset_container_t *container, uint32_t *star
}
const uint64_t *words = container->words;
int32_t size;
- int i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
- size = hamming(words[i]);
+ int i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
+ size = roaring_hamming(words[i]);
if(rank <= *start_rank + size) {
uint64_t w = container->words[i];
uint16_t base = i*64;
while (w != 0) {
uint64_t t = w & (~w + 1);
- int r = __builtin_ctzll(w);
+ int r = roaring_trailing_zeroes(w);
if(*start_rank == rank) {
*element = r+base;
return true;
@@ -11459,17 +14461,16 @@ bool bitset_container_select(const bitset_container_t *container, uint32_t *star
*start_rank += size;
}
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
/* Returns the smallest value (assumes not empty) */
uint16_t bitset_container_minimum(const bitset_container_t *container) {
- int32_t i;
- for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ int32_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
uint64_t w = container->words[i];
if (w != 0) {
- int r = __builtin_ctzll(w);
+ int r = roaring_trailing_zeroes(w);
return r + i * 64;
}
}
@@ -11478,11 +14479,10 @@ uint16_t bitset_container_minimum(const bitset_container_t *container) {
/* Returns the largest value (assumes not empty) */
uint16_t bitset_container_maximum(const bitset_container_t *container) {
- int32_t i;
- for (i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
+ int32_t i; for(i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
uint64_t w = container->words[i];
if (w != 0) {
- int r = __builtin_clzll(w);
+ int r = roaring_leading_zeroes(w);
return i * 64 + 63 - r;
}
}
@@ -11493,17 +14493,60 @@ uint16_t bitset_container_maximum(const bitset_container_t *container) {
int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
// credit: aqrit
int sum = 0;
- int i = 0, end;
- for (end = x / 64; i < end; i++){
- sum += hamming(container->words[i]);
+ int i = 0;
+ int end; for(end = x / 64; i < end; i++){
+ sum += roaring_hamming(container->words[i]);
}
uint64_t lastword = container->words[i];
uint64_t lastpos = UINT64_C(1) << (x % 64);
uint64_t mask = lastpos + lastpos - 1; // smear right
- sum += hamming(lastword & mask);
+ sum += roaring_hamming(lastword & mask);
return sum;
}
+uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
+ const uint16_t high = (uint16_t)((*begin) >> 16);
+ int i = 0;
+ int sum = 0;
+ const uint32_t* iter = begin;
+ for(; iter != end; iter++) {
+ uint32_t x = *iter;
+ uint16_t xhigh = (uint16_t)(x >> 16);
+ if(xhigh != high) return iter - begin; // stop at next container
+
+ uint16_t xlow = (uint16_t)x;
+ int count;
+ for(count = xlow / 64; i < count; i++){
+ sum += roaring_hamming(container->words[i]);
+ }
+ uint64_t lastword = container->words[i];
+ uint64_t lastpos = UINT64_C(1) << (xlow % 64);
+ uint64_t mask = lastpos + lastpos - 1; // smear right
+ *(ans++) = start_rank + sum + roaring_hamming(lastword & mask);
+ }
+ return iter - begin;
+}
+
+
+/* Returns the index of x , if not exsist return -1 */
+int bitset_container_get_index(const bitset_container_t *container, uint16_t x) {
+ if (bitset_container_get(container, x)) {
+ // credit: aqrit
+ int sum = 0;
+ int i = 0;
+ int end; for(end = x / 64; i < end; i++){
+ sum += roaring_hamming(container->words[i]);
+ }
+ uint64_t lastword = container->words[i];
+ uint64_t lastpos = UINT64_C(1) << (x % 64);
+ uint64_t mask = lastpos + lastpos - 1; // smear right
+ sum += roaring_hamming(lastword & mask);
+ return sum - 1;
+ } else {
+ return -1;
+ }
+}
+
/* Returns the index of the first value equal or larger than x, or -1 */
int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
uint32_t x32 = x;
@@ -11516,48 +14559,59 @@ int bitset_container_index_equalorlarger(const bitset_container_t *container, ui
if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
word = container->words[k];
}
- return k * 64 + __builtin_ctzll(word);
+ return k * 64 + roaring_trailing_zeroes(word);
}
#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
#endif
-/* end file src/containers/bitset.c */
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif/* end file src/containers/bitset.c */
/* begin file src/containers/containers.c */
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+// In Windows MSVC C++ compiler, (type){init} does not compile,
+// it causes C4576: a parenthesized type followed by an initializer list is a
+// non-standard explicit type conversion syntax The correct syntax is type{init}
+#define ROARING_INIT_ROARING_CONTAINER_ITERATOR_T roaring_container_iterator_t
+namespace roaring {
+namespace internal {
+#else
+#define ROARING_INIT_ROARING_CONTAINER_ITERATOR_T (roaring_container_iterator_t)
#endif
+static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
+ return (a < b) ? a : b;
+}
+
extern inline const container_t *container_unwrap_shared(
- const container_t *candidate_shared_container, uint8_t *type);
+ const container_t *candidate_shared_container, uint8_t *type);
extern inline container_t *container_mutable_unwrap_shared(
- container_t *candidate_shared_container, uint8_t *type);
+ container_t *candidate_shared_container, uint8_t *type);
-extern inline int container_get_cardinality(
- const container_t *c, uint8_t typecode);
+extern inline int container_get_cardinality(const container_t *c,
+ uint8_t typecode);
-extern inline container_t *container_iand(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+extern inline container_t *container_iand(container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
-extern inline container_t *container_ior(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+extern inline container_t *container_ior(container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
-extern inline container_t *container_ixor(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+extern inline container_t *container_ixor(container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
-extern inline container_t *container_iandnot(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+extern inline container_t *container_iandnot(container_t *c1, uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type);
void container_free(container_t *c, uint8_t type) {
switch (type) {
@@ -11575,16 +14629,12 @@ void container_free(container_t *c, uint8_t type) {
break;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
}
#ifdef NDPI_ENABLE_DEBUG_MESSAGES
-void run_container_printf(const run_container_t *cont);
-void run_container_printf_as_uint32_array(const run_container_t *cont,
- uint32_t base);
-
-void container_printf(const container_t *c, uint8_t type) {
+ void container_printf(const container_t *c, uint8_t type) {
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
@@ -11597,77 +14647,103 @@ void container_printf(const container_t *c, uint8_t type) {
run_container_printf(const_CAST_run(c));
return;
default:
- __builtin_unreachable();
+ roaring_unreachable;
}
}
-void container_printf_as_uint32_array(
- const container_t *c, uint8_t typecode,
- uint32_t base
-){
+void container_printf_as_uint32_array(const container_t *c, uint8_t typecode,
+ uint32_t base) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
- bitset_container_printf_as_uint32_array(
- const_CAST_bitset(c), base);
+ bitset_container_printf_as_uint32_array(const_CAST_bitset(c), base);
return;
case ARRAY_CONTAINER_TYPE:
- array_container_printf_as_uint32_array(
- const_CAST_array(c), base);
+ array_container_printf_as_uint32_array(const_CAST_array(c), base);
return;
case RUN_CONTAINER_TYPE:
- run_container_printf_as_uint32_array(
- const_CAST_run(c), base);
+ run_container_printf_as_uint32_array(const_CAST_run(c), base);
return;
default:
- __builtin_unreachable();
+ roaring_unreachable;
}
}
#endif
+
+bool container_internal_validate(const container_t *container, uint8_t typecode,
+ const char **reason) {
+ if (container == NULL) {
+ *reason = "container is NULL";
+ return false;
+ }
+ // Not using container_unwrap_shared because it asserts if shared containers
+ // are nested
+ if (typecode == SHARED_CONTAINER_TYPE) {
+ const shared_container_t *shared_container =
+ const_CAST_shared(container);
+ if (croaring_refcount_get(&shared_container->counter) == 0) {
+ *reason = "shared container has zero refcount";
+ return false;
+ }
+ if (shared_container->typecode == SHARED_CONTAINER_TYPE) {
+ *reason = "shared container is nested";
+ return false;
+ }
+ if (shared_container->container == NULL) {
+ *reason = "shared container has NULL container";
+ return false;
+ }
+ container = shared_container->container;
+ typecode = shared_container->typecode;
+ }
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_validate(const_CAST_bitset(container),
+ reason);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_validate(const_CAST_array(container),
+ reason);
+ case RUN_CONTAINER_TYPE:
+ return run_container_validate(const_CAST_run(container), reason);
+ default:
+ *reason = "invalid typecode";
+ return false;
+ }
+}
-extern inline bool container_nonzero_cardinality(
- const container_t *c, uint8_t typecode);
-
-extern inline int container_to_uint32_array(
- uint32_t *output,
- const container_t *c, uint8_t typecode,
- uint32_t base);
-
-extern inline container_t *container_add(
- container_t *c,
- uint16_t val,
- uint8_t typecode, // !!! 2nd arg?
- uint8_t *new_typecode);
-
-extern inline bool container_contains(
- const container_t *c,
- uint16_t val,
- uint8_t typecode); // !!! 2nd arg?
-
-extern inline container_t *container_and(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
-
-extern inline container_t *container_or(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
-
-extern inline container_t *container_xor(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
-
-container_t *get_copy_of_container(
- container_t *c, uint8_t *typecode,
- bool copy_on_write
-){
+extern inline bool container_nonzero_cardinality(const container_t *c,
+ uint8_t typecode);
+
+extern inline int container_to_uint32_array(uint32_t *output,
+ const container_t *c,
+ uint8_t typecode, uint32_t base);
+
+extern inline container_t *container_add(container_t *c, uint16_t val,
+ uint8_t typecode, // !!! 2nd arg?
+ uint8_t *new_typecode);
+
+extern inline bool container_contains(const container_t *c, uint16_t val,
+ uint8_t typecode); // !!! 2nd arg?
+
+extern inline container_t *container_and(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_or(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_xor(const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+container_t *get_copy_of_container(container_t *c, uint8_t *typecode,
+ bool copy_on_write) {
if (copy_on_write) {
shared_container_t *shared_container;
if (*typecode == SHARED_CONTAINER_TYPE) {
shared_container = CAST_shared(c);
- shared_container->counter += 1;
+ croaring_refcount_inc(&shared_container->counter);
return shared_container;
}
assert(*typecode != SHARED_CONTAINER_TYPE);
@@ -11679,7 +14755,10 @@ container_t *get_copy_of_container(
shared_container->container = c;
shared_container->typecode = *typecode;
-
+ // At this point, we are creating new shared container
+ // so there should be no other references, and setting
+ // the counter to 2 - even non-atomically - is safe as
+ // long as the value is set before the return statement.
shared_container->counter = 2;
*typecode = SHARED_CONTAINER_TYPE;
@@ -11706,24 +14785,22 @@ container_t *container_clone(const container_t *c, uint8_t typecode) {
case RUN_CONTAINER_TYPE:
return run_container_clone(const_CAST_run(c));
case SHARED_CONTAINER_TYPE:
- // Shared containers are not cloneable. Are you mixing COW and non-COW bitmaps?
+ // Shared containers are not cloneable. Are you mixing COW and
+ // non-COW bitmaps?
return NULL;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
-container_t *shared_container_extract_copy(
- shared_container_t *sc, uint8_t *typecode
-){
- assert(sc->counter > 0);
+container_t *shared_container_extract_copy(shared_container_t *sc,
+ uint8_t *typecode) {
assert(sc->typecode != SHARED_CONTAINER_TYPE);
- sc->counter--;
*typecode = sc->typecode;
container_t *answer;
- if (sc->counter == 0) {
+ if (croaring_refcount_dec(&sc->counter)) {
answer = sc->container;
sc->container = NULL; // paranoid
roaring_free(sc);
@@ -11735,9 +14812,7 @@ container_t *shared_container_extract_copy(
}
void shared_container_free(shared_container_t *container) {
- assert(container->counter > 0);
- container->counter--;
- if (container->counter == 0) {
+ if (croaring_refcount_dec(&container->counter)) {
assert(container->typecode != SHARED_CONTAINER_TYPE);
container_free(container->container, container->typecode);
container->container = NULL; // paranoid
@@ -11745,70 +14820,500 @@ void shared_container_free(shared_container_t *container) {
}
}
-extern inline container_t *container_not(
- const container_t *c1, uint8_t type1,
- uint8_t *result_type);
+extern inline container_t *container_not(const container_t *c1, uint8_t type1,
+ uint8_t *result_type);
-extern inline container_t *container_not_range(
- const container_t *c1, uint8_t type1,
- uint32_t range_start, uint32_t range_end,
- uint8_t *result_type);
+extern inline container_t *container_not_range(const container_t *c1,
+ uint8_t type1,
+ uint32_t range_start,
+ uint32_t range_end,
+ uint8_t *result_type);
-extern inline container_t *container_inot(
- container_t *c1, uint8_t type1,
- uint8_t *result_type);
+extern inline container_t *container_inot(container_t *c1, uint8_t type1,
+ uint8_t *result_type);
-extern inline container_t *container_inot_range(
- container_t *c1, uint8_t type1,
- uint32_t range_start, uint32_t range_end,
- uint8_t *result_type);
+extern inline container_t *container_inot_range(container_t *c1, uint8_t type1,
+ uint32_t range_start,
+ uint32_t range_end,
+ uint8_t *result_type);
-extern inline container_t *container_range_of_ones(
- uint32_t range_start, uint32_t range_end,
- uint8_t *result_type);
+extern inline container_t *container_range_of_ones(uint32_t range_start,
+ uint32_t range_end,
+ uint8_t *result_type);
// where are the correponding things for union and intersection??
-extern inline container_t *container_lazy_xor(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+extern inline container_t *container_lazy_xor(const container_t *c1,
+ uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_lazy_ixor(container_t *c1, uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_andnot(const container_t *c1,
+ uint8_t type1,
+ const container_t *c2,
+ uint8_t type2,
+ uint8_t *result_type);
+
+roaring_container_iterator_t container_init_iterator(const container_t *c,
+ uint8_t typecode,
+ uint16_t *value) {
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ uint32_t wordindex = 0;
+ uint64_t word;
+ while ((word = bc->words[wordindex]) == 0) {
+ wordindex++;
+ }
+ // word is non-zero
+ int32_t index = wordindex * 64 + roaring_trailing_zeroes(word);
+ *value = index;
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
+ .index = index,
+ };
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(c);
+ *value = ac->array[0];
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
+ .index = 0,
+ };
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(c);
+ *value = rc->runs[0].value;
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
+ .index = 0,
+ };
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{0};
+ }
+}
-extern inline container_t *container_lazy_ixor(
- container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+roaring_container_iterator_t container_init_iterator_last(const container_t *c,
+ uint8_t typecode,
+ uint16_t *value) {
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
+ uint64_t word;
+ while ((word = bc->words[wordindex]) == 0) {
+ wordindex--;
+ }
+ // word is non-zero
+ int32_t index =
+ wordindex * 64 + (63 - roaring_leading_zeroes(word));
+ *value = index;
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
+ .index = index,
+ };
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(c);
+ int32_t index = ac->cardinality - 1;
+ *value = ac->array[index];
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
+ .index = index,
+ };
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(c);
+ int32_t run_index = rc->n_runs - 1;
+ const rle16_t *last_run = &rc->runs[run_index];
+ *value = last_run->value + last_run->length;
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
+ .index = run_index,
+ };
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{0};
+ }
+}
+
+bool container_iterator_next(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint16_t *value) {
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ it->index++;
+
+ uint32_t wordindex = it->index / 64;
+ if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) {
+ return false;
+ }
+
+ uint64_t word =
+ bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
+ // next part could be optimized/simplified
+ while (word == 0 &&
+ (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
+ wordindex++;
+ word = bc->words[wordindex];
+ }
+ if (word != 0) {
+ it->index = wordindex * 64 + roaring_trailing_zeroes(word);
+ *value = it->index;
+ return true;
+ }
+ return false;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(c);
+ it->index++;
+ if (it->index < ac->cardinality) {
+ *value = ac->array[it->index];
+ return true;
+ }
+ return false;
+ }
+ case RUN_CONTAINER_TYPE: {
+ if (*value == UINT16_MAX) { // Avoid overflow to zero
+ return false;
+ }
-extern inline container_t *container_andnot(
- const container_t *c1, uint8_t type1,
- const container_t *c2, uint8_t type2,
- uint8_t *result_type);
+ const run_container_t *rc = const_CAST_run(c);
+ uint32_t limit =
+ rc->runs[it->index].value + rc->runs[it->index].length;
+ if (*value < limit) {
+ (*value)++;
+ return true;
+ }
+
+ it->index++;
+ if (it->index < rc->n_runs) {
+ *value = rc->runs[it->index].value;
+ return true;
+ }
+ return false;
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return false;
+ }
+}
+
+bool container_iterator_prev(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint16_t *value) {
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ if (--it->index < 0) {
+ return false;
+ }
+
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ int32_t wordindex = it->index / 64;
+ uint64_t word =
+ bc->words[wordindex] & (UINT64_MAX >> (63 - (it->index % 64)));
+
+ while (word == 0 && --wordindex >= 0) {
+ word = bc->words[wordindex];
+ }
+ if (word == 0) {
+ return false;
+ }
+
+ it->index = (wordindex * 64) + (63 - roaring_leading_zeroes(word));
+ *value = it->index;
+ return true;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ if (--it->index < 0) {
+ return false;
+ }
+ const array_container_t *ac = const_CAST_array(c);
+ *value = ac->array[it->index];
+ return true;
+ }
+ case RUN_CONTAINER_TYPE: {
+ if (*value == 0) {
+ return false;
+ }
+
+ const run_container_t *rc = const_CAST_run(c);
+ (*value)--;
+ if (*value >= rc->runs[it->index].value) {
+ return true;
+ }
+
+ if (--it->index < 0) {
+ return false;
+ }
+
+ *value = rc->runs[it->index].value + rc->runs[it->index].length;
+ return true;
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return false;
+ }
+}
+
+bool container_iterator_lower_bound(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint16_t *value_out, uint16_t val) {
+ if (val > container_maximum(c, typecode)) {
+ return false;
+ }
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ it->index = bitset_container_index_equalorlarger(bc, val);
+ *value_out = it->index;
+ return true;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(c);
+ it->index = array_container_index_equalorlarger(ac, val);
+ *value_out = ac->array[it->index];
+ return true;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(c);
+ it->index = run_container_index_equalorlarger(rc, val);
+ if (rc->runs[it->index].value <= val) {
+ *value_out = val;
+ } else {
+ *value_out = rc->runs[it->index].value;
+ }
+ return true;
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return false;
+ }
+}
+
+bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint32_t high16, uint32_t *buf,
+ uint32_t count, uint32_t *consumed,
+ uint16_t *value_out) {
+ *consumed = 0;
+ if (count == 0) {
+ return false;
+ }
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ uint32_t wordindex = it->index / 64;
+ uint64_t word =
+ bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
+ do {
+ // Read set bits.
+ while (word != 0 && *consumed < count) {
+ *buf = high16 |
+ (wordindex * 64 + roaring_trailing_zeroes(word));
+ word = word & (word - 1);
+ buf++;
+ (*consumed)++;
+ }
+ // Skip unset bits.
+ while (word == 0 &&
+ wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
+ wordindex++;
+ word = bc->words[wordindex];
+ }
+ } while (word != 0 && *consumed < count);
+
+ if (word != 0) {
+ it->index = wordindex * 64 + roaring_trailing_zeroes(word);
+ *value_out = it->index;
+ return true;
+ }
+ return false;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(c);
+ uint32_t num_values =
+ minimum_uint32(ac->cardinality - it->index, count);
+ uint32_t i; for(i = 0; i < num_values; i++) {
+ buf[i] = high16 | ac->array[it->index + i];
+ }
+ *consumed += num_values;
+ it->index += num_values;
+ if (it->index < ac->cardinality) {
+ *value_out = ac->array[it->index];
+ return true;
+ }
+ return false;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(c);
+ do {
+ uint32_t largest_run_value =
+ rc->runs[it->index].value + rc->runs[it->index].length;
+ uint32_t num_values = minimum_uint32(
+ largest_run_value - *value_out + 1, count - *consumed);
+ uint32_t i; for(i = 0; i < num_values; i++) {
+ buf[i] = high16 | (*value_out + i);
+ }
+ *value_out += num_values;
+ buf += num_values;
+ *consumed += num_values;
+
+ // We check for `value == 0` because `it->value += num_values`
+ // can overflow when `value == UINT16_MAX`, and `count >
+ // length`. In this case `value` will overflow to 0.
+ if (*value_out > largest_run_value || *value_out == 0) {
+ it->index++;
+ if (it->index < rc->n_runs) {
+ *value_out = rc->runs[it->index].value;
+ } else {
+ return false;
+ }
+ }
+ } while (*consumed < count);
+ return true;
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return 0;
+ }
+}
+
+bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
+ roaring_container_iterator_t *it,
+ uint64_t high48, uint64_t *buf,
+ uint32_t count, uint32_t *consumed,
+ uint16_t *value_out) {
+ *consumed = 0;
+ if (count == 0) {
+ return false;
+ }
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(c);
+ uint32_t wordindex = it->index / 64;
+ uint64_t word =
+ bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
+ do {
+ // Read set bits.
+ while (word != 0 && *consumed < count) {
+ *buf = high48 |
+ (wordindex * 64 + roaring_trailing_zeroes(word));
+ word = word & (word - 1);
+ buf++;
+ (*consumed)++;
+ }
+ // Skip unset bits.
+ while (word == 0 &&
+ wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
+ wordindex++;
+ word = bc->words[wordindex];
+ }
+ } while (word != 0 && *consumed < count);
+
+ if (word != 0) {
+ it->index = wordindex * 64 + roaring_trailing_zeroes(word);
+ *value_out = it->index;
+ return true;
+ }
+ return false;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(c);
+ uint32_t num_values =
+ minimum_uint32(ac->cardinality - it->index, count);
+ uint32_t i; for(i = 0; i < num_values; i++) {
+ buf[i] = high48 | ac->array[it->index + i];
+ }
+ *consumed += num_values;
+ it->index += num_values;
+ if (it->index < ac->cardinality) {
+ *value_out = ac->array[it->index];
+ return true;
+ }
+ return false;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(c);
+ do {
+ uint32_t largest_run_value =
+ rc->runs[it->index].value + rc->runs[it->index].length;
+ uint32_t num_values = minimum_uint32(
+ largest_run_value - *value_out + 1, count - *consumed);
+ uint32_t i; for(i = 0; i < num_values; i++) {
+ buf[i] = high48 | (*value_out + i);
+ }
+ *value_out += num_values;
+ buf += num_values;
+ *consumed += num_values;
+
+ // We check for `value == 0` because `it->value += num_values`
+ // can overflow when `value == UINT16_MAX`, and `count >
+ // length`. In this case `value` will overflow to 0.
+ if (*value_out > largest_run_value || *value_out == 0) {
+ it->index++;
+ if (it->index < rc->n_runs) {
+ *value_out = rc->runs[it->index].value;
+ } else {
+ return false;
+ }
+ }
+ } while (*consumed < count);
+ return true;
+ }
+ default:
+ assert(false);
+ roaring_unreachable;
+ return 0;
+ }
+}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
+
+#undef ROARING_INIT_ROARING_CONTAINER_ITERATOR_T
/* end file src/containers/containers.c */
/* begin file src/containers/convert.c */
#include <stdio.h>
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
// file contains grubby stuff that must know impl. details of all container
// types.
bitset_container_t *bitset_container_from_array(const array_container_t *ac) {
bitset_container_t *ans = bitset_container_create();
- int limit = array_container_cardinality(ac), i;
- for (i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
+ int limit = array_container_cardinality(ac);
+ int i; for(i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
return ans;
}
bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
int card = run_container_cardinality(arr);
bitset_container_t *answer = bitset_container_create();
- int rlepos;
- for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
+ int rlepos; for(rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
rle16_t vl = arr->runs[rlepos];
bitset_set_lenrange(answer->words, vl.value, vl.length);
}
@@ -11820,12 +15325,11 @@ array_container_t *array_container_from_run(const run_container_t *arr) {
array_container_t *answer =
array_container_create_given_capacity(run_container_cardinality(arr));
answer->cardinality = 0;
- int rlepos;
- for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
+ int rlepos; for(rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
int run_start = arr->runs[rlepos].value;
int run_end = run_start + arr->runs[rlepos].length;
- int run_value;
- for (run_value = run_start; run_value <= run_end; ++run_value) {
+
+ int run_value; for(run_value = run_start; run_value <= run_end; ++run_value) {
answer->array[answer->cardinality++] = (uint16_t)run_value;
}
}
@@ -11836,11 +15340,27 @@ array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
array_container_t *result =
array_container_create_given_capacity(bits->cardinality);
result->cardinality = bits->cardinality;
- // sse version ends up being slower here
- // (bitset_extract_setbits_sse_uint16)
- // because of the sparsity of the data
+#if CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
+ bitset_extract_setbits_avx512_uint16(
+ bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, result->array,
+ bits->cardinality, 0);
+ } else
+#endif
+ {
+ // sse version ends up being slower here
+ // (bitset_extract_setbits_sse_uint16)
+ // because of the sparsity of the data
+ bitset_extract_setbits_uint16(
+ bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, result->array, 0);
+ }
+#else
+ // If the system is not x64, then we have no accelerated function.
bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
result->array, 0);
+#endif
+
return result;
}
@@ -11855,10 +15375,10 @@ run_container_t *run_container_from_array(const array_container_t *c) {
int32_t n_runs = array_container_number_of_runs(c);
run_container_t *answer = run_container_create_given_capacity(n_runs);
int prev = -2;
- int run_start = -1, i;
+ int run_start = -1;
int32_t card = c->cardinality;
if (card == 0) return answer;
- for (i = 0; i < card; ++i) {
+ int i; for(i = 0; i < card; ++i) {
const uint16_t cur_val = c->array[i];
if (cur_val != prev + 1) {
// new run starts; flush old one, if any
@@ -11879,19 +15399,16 @@ run_container_t *run_container_from_array(const array_container_t *c) {
* Allocates and returns new container, which caller is responsible for freeing.
* It does not free the run container.
*/
-container_t *convert_to_bitset_or_array_container(
- run_container_t *rc, int32_t card,
- uint8_t *resulttype
-){
+container_t *convert_to_bitset_or_array_container(run_container_t *rc,
+ int32_t card,
+ uint8_t *resulttype) {
if (card <= DEFAULT_MAX_SIZE) {
array_container_t *answer = array_container_create_given_capacity(card);
- int rlepos;
answer->cardinality = 0;
- for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
+ int rlepos; for(rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
uint16_t run_start = rc->runs[rlepos].value;
uint16_t run_end = run_start + rc->runs[rlepos].length;
- uint16_t run_value;
- for (run_value = run_start; run_value < run_end;
+ uint16_t run_value; for(run_value = run_start; run_value < run_end;
++run_value) {
answer->array[answer->cardinality++] = run_value;
}
@@ -11899,18 +15416,17 @@ container_t *convert_to_bitset_or_array_container(
}
assert(card == answer->cardinality);
*resulttype = ARRAY_CONTAINER_TYPE;
- //run_container_free(r);
+ // run_container_free(r);
return answer;
}
bitset_container_t *answer = bitset_container_create();
- int rlepos;
- for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
+ int rlepos; for(rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
uint16_t run_start = rc->runs[rlepos].value;
bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length);
}
answer->cardinality = card;
*resulttype = BITSET_CONTAINER_TYPE;
- //run_container_free(r);
+ // run_container_free(r);
return answer;
}
@@ -11919,10 +15435,8 @@ container_t *convert_to_bitset_or_array_container(
/* If a conversion occurs, the caller is responsible to free the original
* container and
* he becomes responsible to free the new one. */
-container_t *convert_run_to_efficient_container(
- run_container_t *c,
- uint8_t *typecode_after
-){
+container_t *convert_run_to_efficient_container(run_container_t *c,
+ uint8_t *typecode_after) {
int32_t size_as_run_container =
run_container_serialized_size_in_bytes(c->n_runs);
@@ -11944,12 +15458,11 @@ container_t *convert_run_to_efficient_container(
// to array
array_container_t *answer = array_container_create_given_capacity(card);
answer->cardinality = 0;
- int rlepos;
- for (rlepos = 0; rlepos < c->n_runs; ++rlepos) {
+ int rlepos; for(rlepos = 0; rlepos < c->n_runs; ++rlepos) {
int run_start = c->runs[rlepos].value;
int run_end = run_start + c->runs[rlepos].length;
- int run_value;
- for (run_value = run_start; run_value <= run_end; ++run_value) {
+
+ int run_value; for(run_value = run_start; run_value <= run_end; ++run_value) {
answer->array[answer->cardinality++] = (uint16_t)run_value;
}
}
@@ -11959,8 +15472,8 @@ container_t *convert_run_to_efficient_container(
// else to bitset
bitset_container_t *answer = bitset_container_create();
- int rlepos;
- for (rlepos = 0; rlepos < c->n_runs; ++rlepos) {
+
+ int rlepos; for(rlepos = 0; rlepos < c->n_runs; ++rlepos) {
int start = c->runs[rlepos].value;
int end = start + c->runs[rlepos].length;
bitset_set_range(answer->words, start, end + 1);
@@ -11972,9 +15485,7 @@ container_t *convert_run_to_efficient_container(
// like convert_run_to_efficient_container but frees the old result if needed
container_t *convert_run_to_efficient_container_and_free(
- run_container_t *c,
- uint8_t *typecode_after
-){
+ run_container_t *c, uint8_t *typecode_after) {
container_t *answer = convert_run_to_efficient_container(c, typecode_after);
if (answer != c) run_container_free(c);
return answer;
@@ -11987,13 +15498,11 @@ container_t *convert_run_to_efficient_container_and_free(
// TODO: split into run- array- and bitset- subfunctions for sanity;
// a few function calls won't really matter.
-container_t *convert_run_optimize(
- container_t *c, uint8_t typecode_original,
- uint8_t *typecode_after
-){
+container_t *convert_run_optimize(container_t *c, uint8_t typecode_original,
+ uint8_t *typecode_after) {
if (typecode_original == RUN_CONTAINER_TYPE) {
- container_t *newc = convert_run_to_efficient_container(
- CAST_run(c), typecode_after);
+ container_t *newc =
+ convert_run_to_efficient_container(CAST_run(c), typecode_after);
if (newc != c) {
container_free(c, typecode_original);
}
@@ -12016,11 +15525,9 @@ container_t *convert_run_optimize(
run_container_t *answer = run_container_create_given_capacity(n_runs);
int prev = -2;
int run_start = -1;
- int i;
assert(card > 0);
-
- for (i = 0; i < card; ++i) {
+ int i; for(i = 0; i < card; ++i) {
uint16_t cur_val = c_qua_array->array[i];
if (cur_val != prev + 1) {
// new run starts; flush old one, if any
@@ -12068,7 +15575,7 @@ container_t *convert_run_optimize(
return answer;
}
- int local_run_start = __builtin_ctzll(cur_word);
+ int local_run_start = roaring_trailing_zeroes(cur_word);
int run_start = local_run_start + 64 * long_ctr;
uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
@@ -12084,7 +15591,7 @@ container_t *convert_run_optimize(
*typecode_after = RUN_CONTAINER_TYPE;
return answer;
}
- int local_run_end = __builtin_ctzll(~cur_word_with_1s);
+ int local_run_end = roaring_trailing_zeroes(~cur_word_with_1s);
run_end = local_run_end + long_ctr * 64;
add_run(answer, run_start, run_end - 1);
cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
@@ -12092,33 +15599,31 @@ container_t *convert_run_optimize(
return answer;
} else {
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
return NULL;
}
}
-container_t *container_from_run_range(
- const run_container_t *run,
- uint32_t min, uint32_t max, uint8_t *typecode_after
-){
+container_t *container_from_run_range(const run_container_t *run, uint32_t min,
+ uint32_t max, uint8_t *typecode_after) {
// We expect most of the time to end up with a bitset container
bitset_container_t *bitset = bitset_container_create();
*typecode_after = BITSET_CONTAINER_TYPE;
int32_t union_cardinality = 0;
- int32_t i;
- for (i = 0; i < run->n_runs; ++i) {
+ int32_t i; for(i = 0; i < run->n_runs; ++i) {
uint32_t rle_min = run->runs[i].value;
uint32_t rle_max = rle_min + run->runs[i].length;
bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min);
union_cardinality += run->runs[i].length + 1;
}
union_cardinality += max - min + 1;
- union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min);
+ union_cardinality -=
+ bitset_lenrange_cardinality(bitset->words, min, max - min);
bitset_set_lenrange(bitset->words, min, max - min);
bitset->cardinality = union_cardinality;
- if(bitset->cardinality <= DEFAULT_MAX_SIZE) {
+ if (bitset->cardinality <= DEFAULT_MAX_SIZE) {
// we need to convert to an array container
- array_container_t * array = array_container_from_bitset(bitset);
+ array_container_t *array = array_container_from_bitset(bitset);
*typecode_after = ARRAY_CONTAINER_TYPE;
bitset_container_free(bitset);
return array;
@@ -12127,7 +15632,9 @@ container_t *container_from_run_range(
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/convert.c */
/* begin file src/containers/mixed_andnot.c */
@@ -12141,7 +15648,9 @@ container_t *container_from_run_range(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the andnot of src_1 and src_2 and write the result to
@@ -12155,8 +15664,7 @@ void array_bitset_container_andnot(const array_container_t *src_1,
}
int32_t newcard = 0;
const int32_t origcard = src_1->cardinality;
- int i;
- for (i = 0; i < origcard; ++i) {
+ int i; for(i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
dst->array[newcard] = key;
newcard += 1 - bitset_container_contains(src_2, key);
@@ -12177,10 +15685,9 @@ void array_bitset_container_iandnot(array_container_t *src_1,
* Return true for a bitset result; false for array
*/
-bool bitset_array_container_andnot(
- const bitset_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool bitset_array_container_andnot(const bitset_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
// Java did this directly, but we have option of asm or avx
bitset_container_t *result = bitset_container_create();
bitset_container_copy(src_1, result);
@@ -12205,10 +15712,9 @@ bool bitset_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_array_container_iandnot(
- bitset_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool bitset_array_container_iandnot(bitset_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
*dst = src_1;
src_1->cardinality =
(int32_t)bitset_clear_list(src_1->words, (uint64_t)src_1->cardinality,
@@ -12229,21 +15735,18 @@ bool bitset_array_container_iandnot(
* result true) or an array container.
*/
-bool run_bitset_container_andnot(
- const run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool run_bitset_container_andnot(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
// follows the Java implementation as of June 2016
int card = run_container_cardinality(src_1);
if (card <= DEFAULT_MAX_SIZE) {
// must be an array
array_container_t *answer = array_container_create_given_capacity(card);
answer->cardinality = 0;
- int32_t rlepos;
- for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
- int run_value;
- for (run_value = rle.value; run_value <= rle.value + rle.length;
+ int run_value; for(run_value = rle.value; run_value <= rle.value + rle.length;
++run_value) {
if (!bitset_container_get(src_2, (uint16_t)run_value)) {
answer->array[answer->cardinality++] = (uint16_t)run_value;
@@ -12257,8 +15760,7 @@ bool run_bitset_container_andnot(
bitset_container_t *answer = bitset_container_clone(src_2);
uint32_t last_pos = 0;
- int32_t rlepos;
- for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
uint32_t start = rle.value;
@@ -12288,10 +15790,9 @@ bool run_bitset_container_andnot(
* result true) or an array container.
*/
-bool run_bitset_container_iandnot(
- run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool run_bitset_container_iandnot(run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
// dummy implementation
bool ans = run_bitset_container_andnot(src_1, src_2, dst);
run_container_free(src_1);
@@ -12305,16 +15806,14 @@ bool run_bitset_container_iandnot(
* result true) or an array container.
*/
-bool bitset_run_container_andnot(
- const bitset_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+bool bitset_run_container_andnot(const bitset_container_t *src_1,
+ const run_container_t *src_2,
+ container_t **dst) {
// follows Java implementation
bitset_container_t *result = bitset_container_create();
- int32_t rlepos;
+
bitset_container_copy(src_1, result);
-
- for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
rle16_t rle = src_2->runs[rlepos];
bitset_reset_range(result->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -12337,13 +15836,12 @@ bool bitset_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_run_container_iandnot(
- bitset_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+bool bitset_run_container_iandnot(bitset_container_t *src_1,
+ const run_container_t *src_2,
+ container_t **dst) {
*dst = src_1;
- int32_t rlepos; for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
rle16_t rle = src_2->runs[rlepos];
bitset_reset_range(src_1->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -12373,7 +15871,7 @@ static int run_array_array_subtract(const run_container_t *rc,
int32_t in_array_pos =
-1; // since advanceUntil always assumes we start the search AFTER this
- int rlepos; for (rlepos = 0; rlepos < rc->n_runs; rlepos++) {
+ int rlepos; for(rlepos = 0; rlepos < rc->n_runs; rlepos++) {
int32_t start = rc->runs[rlepos].value;
int32_t end = start + rc->runs[rlepos].length + 1;
@@ -12381,17 +15879,17 @@ static int run_array_array_subtract(const run_container_t *rc,
a_in->cardinality, (uint16_t)start);
if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
- int32_t i; for (i = start; i < end; ++i)
+ int32_t i; for(i = start; i < end; ++i)
a_out->array[out_card++] = (uint16_t)i;
} else {
uint16_t next_nonincluded = a_in->array[in_array_pos];
if (next_nonincluded >= end) {
// another case when run goes unaltered
- int32_t i; for (i = start; i < end; ++i)
+ int32_t i; for(i = start; i < end; ++i)
a_out->array[out_card++] = (uint16_t)i;
in_array_pos--; // ensure we see this item again if necessary
} else {
- int32_t i; for (i = start; i < end; ++i)
+ int32_t i; for(i = start; i < end; ++i)
if (i != next_nonincluded)
a_out->array[out_card++] = (uint16_t)i;
else // 0 should ensure we don't match
@@ -12410,10 +15908,9 @@ static int run_array_array_subtract(const run_container_t *rc,
* can become any type of container.
*/
-int run_array_container_andnot(
- const run_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+int run_array_container_andnot(const run_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
// follows the Java impl as of June 2016
int card = run_container_cardinality(src_1);
@@ -12493,8 +15990,7 @@ int run_array_container_andnot(
}
bitset_container_t *ans = bitset_container_from_run(src_1);
bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
- return (result_is_bitset ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE);
+ return (result_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
}
/* Compute the andnot of src_1 and src_2 and write the result to
@@ -12504,10 +16000,9 @@ int run_array_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-int run_array_container_iandnot(
- run_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+int run_array_container_iandnot(run_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
// dummy implementation same as June 2016 Java
int ans = run_array_container_andnot(src_1, src_2, dst);
run_container_free(src_1);
@@ -12536,7 +16031,7 @@ void array_run_container_andnot(const array_container_t *src_1,
uint16_t val = 0;
int dest_card = 0;
- int i; for (i = 0; i < src_1->cardinality; ++i) {
+ int i; for(i = 0; i < src_1->cardinality; ++i) {
val = src_1->array[i];
if (val < run_start)
dst->array[dest_card++] = val;
@@ -12571,10 +16066,8 @@ void array_run_container_iandnot(array_container_t *src_1,
* can become any kind of container.
*/
-int run_run_container_andnot(
- const run_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+int run_run_container_andnot(const run_container_t *src_1,
+ const run_container_t *src_2, container_t **dst) {
run_container_t *ans = run_container_create();
run_container_andnot(src_1, src_2, ans);
uint8_t typecode_after;
@@ -12589,10 +16082,8 @@ int run_run_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-int run_run_container_iandnot(
- run_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+int run_run_container_iandnot(run_container_t *src_1,
+ const run_container_t *src_2, container_t **dst) {
// following Java impl as of June 2016 (dummy)
int ans = run_run_container_andnot(src_1, src_2, dst);
run_container_free(src_1);
@@ -12621,10 +16112,9 @@ void array_array_container_iandnot(array_container_t *src_1,
* "dst is a bitset"
*/
-bool bitset_bitset_container_andnot(
- const bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
bitset_container_t *ans = bitset_container_create();
int card = bitset_container_andnot(src_1, src_2, ans);
if (card <= DEFAULT_MAX_SIZE) {
@@ -12644,10 +16134,9 @@ bool bitset_bitset_container_andnot(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_bitset_container_iandnot(
- bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
int card = bitset_container_andnot(src_1, src_2, src_1);
if (card <= DEFAULT_MAX_SIZE) {
*dst = array_container_from_bitset(src_1);
@@ -12660,13 +16149,17 @@ bool bitset_bitset_container_iandnot(
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_andnot.c */
/* begin file src/containers/mixed_equal.c */
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
bool array_container_equal_bitset(const array_container_t* container1,
@@ -12677,11 +16170,11 @@ bool array_container_equal_bitset(const array_container_t* container1,
}
}
int32_t pos = 0;
- int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ int32_t i; for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
uint64_t w = container2->words[i];
while (w != 0) {
uint64_t t = w & (~w + 1);
- uint16_t r = i * 64 + __builtin_ctzll(w);
+ uint16_t r = i * 64 + roaring_trailing_zeroes(w);
if (pos >= container1->cardinality) {
return false;
}
@@ -12700,7 +16193,7 @@ bool run_container_equals_array(const run_container_t* container1,
if (run_container_cardinality(container1) != container2->cardinality)
return false;
int32_t pos = 0;
- int i; for (i = 0; i < container1->n_runs; ++i) {
+ int i; for(i = 0; i < container1->n_runs; ++i) {
const uint32_t run_start = container1->runs[i].value;
const uint32_t le = container1->runs[i].length;
@@ -12719,16 +16212,15 @@ bool run_container_equals_array(const run_container_t* container1,
bool run_container_equals_bitset(const run_container_t* container1,
const bitset_container_t* container2) {
-
int run_card = run_container_cardinality(container1);
- int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ?
- container2->cardinality :
- bitset_container_compute_cardinality(container2);
+ int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)
+ ? container2->cardinality
+ : bitset_container_compute_cardinality(container2);
if (bitset_card != run_card) {
return false;
}
- int32_t i; for (i = 0; i < container1->n_runs; i++) {
+ int32_t i; for(i = 0; i < container1->n_runs; i++) {
uint32_t begin = container1->runs[i].value;
if (container1->runs[i].length) {
uint32_t end = begin + container1->runs[i].length + 1;
@@ -12746,7 +16238,9 @@ bool run_container_equals_bitset(const run_container_t* container1,
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_equal.c */
/* begin file src/containers/mixed_intersection.c */
@@ -12757,7 +16251,9 @@ bool run_container_equals_bitset(const run_container_t* container1,
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the intersection of src_1 and src_2 and write the result to
@@ -12770,7 +16266,7 @@ void array_bitset_container_intersection(const array_container_t *src_1,
}
int32_t newcard = 0; // dst could be src_1
const int32_t origcard = src_1->cardinality;
- int i; for (i = 0; i < origcard; ++i) {
+ int i; for(i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
// this branchless approach is much faster...
dst->array[newcard] = key;
@@ -12796,22 +16292,21 @@ int array_bitset_container_intersection_cardinality(
const array_container_t *src_1, const bitset_container_t *src_2) {
int32_t newcard = 0;
const int32_t origcard = src_1->cardinality;
- int i; for (i = 0; i < origcard; ++i) {
+ int i; for(i = 0; i < origcard; ++i) {
uint16_t key = src_1->array[i];
newcard += bitset_container_contains(src_2, key);
}
return newcard;
}
-
bool array_bitset_container_intersect(const array_container_t *src_1,
- const bitset_container_t *src_2) {
- const int32_t origcard = src_1->cardinality;
- int i; for (i = 0; i < origcard; ++i) {
- uint16_t key = src_1->array[i];
- if(bitset_container_contains(src_2, key)) return true;
- }
- return false;
+ const bitset_container_t *src_2) {
+ const int32_t origcard = src_1->cardinality;
+ int i; for(i = 0; i < origcard; ++i) {
+ uint16_t key = src_1->array[i];
+ if (bitset_container_contains(src_2, key)) return true;
+ }
+ return false;
}
/* Compute the intersection of src_1 and src_2 and write the result to
@@ -12861,10 +16356,9 @@ void array_run_container_intersection(const array_container_t *src_1,
* *dst. If the result is true then the result is a bitset_container_t
* otherwise is a array_container_t. If *dst == src_2, an in-place processing
* is attempted.*/
-bool run_bitset_container_intersection(
- const run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool run_bitset_container_intersection(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
if (run_container_is_full(src_1)) {
if (*dst != src_2) *dst = bitset_container_clone(src_2);
return true;
@@ -12881,10 +16375,10 @@ bool run_bitset_container_intersection(
if (*dst == NULL) {
return false;
}
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
uint32_t endofrun = (uint32_t)rle.value + rle.length;
- uint32_t runValue; for (runValue = rle.value; runValue <= endofrun;
+ uint32_t runValue; for(runValue = rle.value; runValue <= endofrun;
++runValue) {
answer->array[answer->cardinality] = (uint16_t)runValue;
answer->cardinality +=
@@ -12896,7 +16390,7 @@ bool run_bitset_container_intersection(
if (*dst == src_2) { // we attempt in-place
bitset_container_t *answer = CAST_bitset(*dst);
uint32_t start = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
const rle16_t rle = src_1->runs[rlepos];
uint32_t end = rle.value;
bitset_reset_range(src_2->words, start, end);
@@ -12925,7 +16419,7 @@ bool run_bitset_container_intersection(
return true;
}
uint32_t start = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
const rle16_t rle = src_1->runs[rlepos];
uint32_t end = rle.value;
bitset_reset_range(answer->words, start, end);
@@ -12991,7 +16485,7 @@ int run_bitset_container_intersection_cardinality(
return bitset_container_cardinality(src_2);
}
int answer = 0;
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
answer +=
bitset_lenrange_cardinality(src_2->words, rle.value, rle.length);
@@ -12999,13 +16493,12 @@ int run_bitset_container_intersection_cardinality(
return answer;
}
-
bool array_run_container_intersect(const array_container_t *src_1,
- const run_container_t *src_2) {
- if( run_container_is_full(src_2) ) {
- return !array_container_empty(src_1);
- }
- if (src_2->n_runs == 0) {
+ const run_container_t *src_2) {
+ if (run_container_is_full(src_2)) {
+ return !array_container_empty(src_1);
+ }
+ if (src_2->n_runs == 0) {
return false;
}
int32_t rlepos = 0;
@@ -13034,15 +16527,16 @@ bool array_run_container_intersect(const array_container_t *src_1,
/* Compute the intersection between src_1 and src_2
**/
bool run_bitset_container_intersect(const run_container_t *src_1,
- const bitset_container_t *src_2) {
- if( run_container_is_full(src_1) ) {
- return !bitset_container_empty(src_2);
- }
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
- rle16_t rle = src_1->runs[rlepos];
- if(!bitset_lenrange_empty(src_2->words, rle.value,rle.length)) return true;
- }
- return false;
+ const bitset_container_t *src_2) {
+ if (run_container_is_full(src_1)) {
+ return !bitset_container_empty(src_2);
+ }
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ if (!bitset_lenrange_empty(src_2->words, rle.value, rle.length))
+ return true;
+ }
+ return false;
}
/*
@@ -13050,10 +16544,9 @@ bool run_bitset_container_intersect(const run_container_t *src_1,
* to *dst. If the return function is true, the result is a bitset_container_t
* otherwise is a array_container_t.
*/
-bool bitset_bitset_container_intersection(
- const bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
const int newCardinality = bitset_container_and_justcard(src_1, src_2);
if (newCardinality > DEFAULT_MAX_SIZE) {
*dst = bitset_container_create();
@@ -13075,8 +16568,7 @@ bool bitset_bitset_container_intersection(
bool bitset_bitset_container_intersection_inplace(
bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+ container_t **dst) {
const int newCardinality = bitset_container_and_justcard(src_1, src_2);
if (newCardinality > DEFAULT_MAX_SIZE) {
*dst = src_1;
@@ -13095,7 +16587,9 @@ bool bitset_bitset_container_intersection_inplace(
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_intersection.c */
/* begin file src/containers/mixed_negation.c */
@@ -13109,7 +16603,9 @@ bool bitset_bitset_container_intersection_inplace(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
// TODO: make simplified and optimized negation code across
@@ -13142,9 +16638,8 @@ void array_container_negation(const array_container_t *src,
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-bool bitset_container_negation(
- const bitset_container_t *src, container_t **dst
-){
+bool bitset_container_negation(const bitset_container_t *src,
+ container_t **dst) {
return bitset_container_negation_range(src, 0, (1 << 16), dst);
}
@@ -13157,9 +16652,8 @@ bool bitset_container_negation(
* to free the container.
* In all cases, the result is in *dst.
*/
-bool bitset_container_negation_inplace(
- bitset_container_t *src, container_t **dst
-){
+bool bitset_container_negation_inplace(bitset_container_t *src,
+ container_t **dst) {
return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
}
@@ -13189,11 +16683,9 @@ int run_container_negation_inplace(run_container_t *src, container_t **dst) {
* to *dst. Returns true if the result is a bitset container
* and false for an array container. *dst is not preallocated.
*/
-bool array_container_negation_range(
- const array_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
+bool array_container_negation_range(const array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst) {
/* close port of the Java implementation */
if (range_start >= range_end) {
*dst = array_container_clone(src);
@@ -13228,9 +16720,9 @@ bool array_container_negation_range(
array_container_t *arr =
array_container_create_given_capacity(new_cardinality);
*dst = (container_t *)arr;
- if(new_cardinality == 0) {
- arr->cardinality = new_cardinality;
- return false; // we are done.
+ if (new_cardinality == 0) {
+ arr->cardinality = new_cardinality;
+ return false; // we are done.
}
// copy stuff before the active area
memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
@@ -13259,11 +16751,10 @@ bool array_container_negation_range(
* inplace version without inefficient copying.
*/
-bool array_container_negation_range_inplace(
- array_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
+bool array_container_negation_range_inplace(array_container_t *src,
+ const int range_start,
+ const int range_end,
+ container_t **dst) {
bool ans = array_container_negation_range(src, range_start, range_end, dst);
// TODO : try a real inplace version
array_container_free(src);
@@ -13277,11 +16768,9 @@ bool array_container_negation_range_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-bool bitset_container_negation_range(
- const bitset_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
+bool bitset_container_negation_range(const bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst) {
// TODO maybe consider density-based estimate
// and sometimes build result directly as array, with
// conversion back to bitset if wrong. Or determine
@@ -13311,11 +16800,10 @@ bool bitset_container_negation_range(
* to free the container.
* In all cases, the result is in *dst.
*/
-bool bitset_container_negation_range_inplace(
- bitset_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
+bool bitset_container_negation_range_inplace(bitset_container_t *src,
+ const int range_start,
+ const int range_end,
+ container_t **dst) {
bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end);
src->cardinality = bitset_container_compute_cardinality(src);
if (src->cardinality > DEFAULT_MAX_SIZE) {
@@ -13333,11 +16821,9 @@ bool bitset_container_negation_range_inplace(
* We assume that dst is not pre-allocated. In
* case of failure, *dst will be NULL.
*/
-int run_container_negation_range(
- const run_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
+int run_container_negation_range(const run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst) {
uint8_t return_typecode;
// follows the Java implementation
@@ -13375,11 +16861,10 @@ int run_container_negation_range(
* then src is modified and no allocation is made.
* In all cases, the result is in *dst.
*/
-int run_container_negation_range_inplace(
- run_container_t *src,
- const int range_start, const int range_end,
- container_t **dst
-){
+int run_container_negation_range_inplace(run_container_t *src,
+ const int range_start,
+ const int range_end,
+ container_t **dst) {
uint8_t return_typecode;
if (range_end <= range_start) {
@@ -13452,13 +16937,17 @@ int run_container_negation_range_inplace(
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_negation.c */
/* begin file src/containers/mixed_subset.c */
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
bool array_container_is_subset_bitset(const array_container_t* container1,
@@ -13468,7 +16957,7 @@ bool array_container_is_subset_bitset(const array_container_t* container1,
return false;
}
}
- int i; for (i = 0; i < container1->cardinality; ++i) {
+ int i; for(i = 0; i < container1->cardinality; ++i) {
if (!bitset_container_contains(container2, container1->array[i])) {
return false;
}
@@ -13481,7 +16970,7 @@ bool run_container_is_subset_array(const run_container_t* container1,
if (run_container_cardinality(container1) > container2->cardinality)
return false;
int32_t start_pos = -1, stop_pos = -1;
- int i; for (i = 0; i < container1->n_runs; ++i) {
+ int i; for(i = 0; i < container1->n_runs; ++i) {
int32_t start = container1->runs[i].value;
int32_t stop = start + container1->runs[i].length;
start_pos = advanceUntil(container2->array, stop_pos,
@@ -13536,10 +17025,10 @@ bool run_container_is_subset_bitset(const run_container_t* container1,
return false;
}
}
- int i; for (i = 0; i < container1->n_runs; ++i) {
+ int i; for(i = 0; i < container1->n_runs; ++i) {
uint32_t run_start = container1->runs[i].value;
uint32_t le = container1->runs[i].length;
- uint32_t j;for ( j = run_start; j <= run_start + le; ++j) {
+ uint32_t j; for(j = run_start; j <= run_start + le; ++j) {
if (!bitset_container_contains(container2, j)) {
return false;
}
@@ -13564,7 +17053,7 @@ bool bitset_container_is_subset_run(const bitset_container_t* container1,
uint32_t start = container2->runs[i_run].value;
uint32_t stop = start + container2->runs[i_run].length;
uint64_t t = w & (~w + 1);
- uint16_t r = i_bitset * 64 + __builtin_ctzll(w);
+ uint16_t r = i_bitset * 64 + roaring_trailing_zeroes(w);
if (r < start) {
return false;
} else if (r > stop) {
@@ -13593,7 +17082,9 @@ bool bitset_container_is_subset_run(const bitset_container_t* container1,
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_subset.c */
/* begin file src/containers/mixed_union.c */
@@ -13607,7 +17098,9 @@ bool bitset_container_is_subset_run(const bitset_container_t* container1,
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the union of src_1 and src_2 and write the result to
@@ -13636,7 +17129,7 @@ void run_bitset_container_union(const run_container_t *src_1,
bitset_container_t *dst) {
assert(!run_container_is_full(src_1)); // catch this case upstream
if (src_2 != dst) bitset_container_copy(src_2, dst);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
bitset_set_lenrange(dst->words, rle.value, rle.length);
}
@@ -13648,7 +17141,7 @@ void run_bitset_container_lazy_union(const run_container_t *src_1,
bitset_container_t *dst) {
assert(!run_container_is_full(src_1)); // catch this case upstream
if (src_2 != dst) bitset_container_copy(src_2, dst);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
bitset_set_lenrange(dst->words, rle.value, rle.length);
}
@@ -13752,17 +17245,16 @@ void array_run_container_inplace_union(const array_container_t *src_1,
}
}
-bool array_array_container_union(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_union(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int totalCardinality = src_1->cardinality + src_2->cardinality;
if (totalCardinality <= DEFAULT_MAX_SIZE) {
*dst = array_container_create_given_capacity(totalCardinality);
if (*dst != NULL) {
array_container_union(src_1, src_2, CAST_array(*dst));
} else {
- return true; // otherwise failure won't be caught
+ return true; // otherwise failure won't be caught
}
return false; // not a bitset
}
@@ -13784,26 +17276,32 @@ bool array_array_container_union(
return returnval;
}
-bool array_array_container_inplace_union(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_inplace_union(array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int totalCardinality = src_1->cardinality + src_2->cardinality;
*dst = NULL;
if (totalCardinality <= DEFAULT_MAX_SIZE) {
- if(src_1->capacity < totalCardinality) {
- *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
- if (*dst != NULL) {
- array_container_union(src_1, src_2, CAST_array(*dst));
- } else {
- return true; // otherwise failure won't be caught
- }
- return false; // not a bitset
+ if (src_1->capacity < totalCardinality) {
+ *dst = array_container_create_given_capacity(
+ 2 * totalCardinality); // be purposefully generous
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
} else {
- memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
- src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
- src_2->array, src_2->cardinality, src_1->array);
- return false; // not a bitset
+ memmove(src_1->array + src_2->cardinality, src_1->array,
+ src_1->cardinality * sizeof(uint16_t));
+ // In theory, we could use fast_union_uint16, but it is unsafe. It
+ // fails with Intel compilers in particular.
+ // https://github.com/RoaringBitmap/CRoaring/pull/452
+ // See report https://github.com/RoaringBitmap/CRoaring/issues/476
+ src_1->cardinality = (int32_t)union_uint16(
+ src_1->array + src_2->cardinality, src_1->cardinality,
+ src_2->array, src_2->cardinality, src_1->array);
+ return false; // not a bitset
}
}
*dst = bitset_container_create();
@@ -13816,13 +17314,14 @@ bool array_array_container_inplace_union(
src_2->cardinality);
if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
// need to convert!
- if(src_1->capacity < ourbitset->cardinality) {
- array_container_grow(src_1, ourbitset->cardinality, false);
+ if (src_1->capacity < ourbitset->cardinality) {
+ array_container_grow(src_1, ourbitset->cardinality, false);
}
- bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS,
- src_1->array, 0);
- src_1->cardinality = ourbitset->cardinality;
+ bitset_extract_setbits_uint16(ourbitset->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS,
+ src_1->array, 0);
+ src_1->cardinality = ourbitset->cardinality;
*dst = src_1;
bitset_container_free(ourbitset);
returnval = false; // not going to be a bitset
@@ -13831,18 +17330,28 @@ bool array_array_container_inplace_union(
return returnval;
}
-
-bool array_array_container_lazy_union(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_lazy_union(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int totalCardinality = src_1->cardinality + src_2->cardinality;
+ //
+ // We assume that operations involving bitset containers will be faster than
+ // operations involving solely array containers, except maybe when array
+ // containers are small. Indeed, for example, it is cheap to compute the
+ // union between an array and a bitset container, generally more so than
+ // between a large array and another array. So it is advantageous to favour
+ // bitset containers during the computation. Of course, if we convert array
+ // containers eagerly to bitset containers, we may later need to revert the
+ // bitset containers to array containerr to satisfy the Roaring format
+ // requirements, but such one-time conversions at the end may not be overly
+ // expensive. We arrived to this design based on extensive benchmarking.
+ //
if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
*dst = array_container_create_given_capacity(totalCardinality);
if (*dst != NULL) {
array_container_union(src_1, src_2, CAST_array(*dst));
} else {
- return true; // otherwise failure won't be caught
+ return true; // otherwise failure won't be caught
}
return false; // not a bitset
}
@@ -13857,27 +17366,78 @@ bool array_array_container_lazy_union(
return returnval;
}
-
-bool array_array_container_lazy_inplace_union(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_lazy_inplace_union(array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int totalCardinality = src_1->cardinality + src_2->cardinality;
*dst = NULL;
+ //
+ // We assume that operations involving bitset containers will be faster than
+ // operations involving solely array containers, except maybe when array
+ // containers are small. Indeed, for example, it is cheap to compute the
+ // union between an array and a bitset container, generally more so than
+ // between a large array and another array. So it is advantageous to favour
+ // bitset containers during the computation. Of course, if we convert array
+ // containers eagerly to bitset containers, we may later need to revert the
+ // bitset containers to array containerr to satisfy the Roaring format
+ // requirements, but such one-time conversions at the end may not be overly
+ // expensive. We arrived to this design based on extensive benchmarking.
+ //
if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
- if(src_1->capacity < totalCardinality) {
- *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
- if (*dst != NULL) {
- array_container_union(src_1, src_2, CAST_array(*dst));
- } else {
- return true; // otherwise failure won't be caught
- }
- return false; // not a bitset
+ if (src_1->capacity < totalCardinality) {
+ *dst = array_container_create_given_capacity(
+ 2 * totalCardinality); // be purposefully generous
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
} else {
- memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
- src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
- src_2->array, src_2->cardinality, src_1->array);
- return false; // not a bitset
+ memmove(src_1->array + src_2->cardinality, src_1->array,
+ src_1->cardinality * sizeof(uint16_t));
+ /*
+ Next line is safe:
+
+ We just need to focus on the reading and writing performed on
+ array1. In `union_vector16`, both vectorized and scalar code still
+ obey the basic rule: read from two inputs, do the union, and then
+ write the output.
+
+ Let's say the length(cardinality) of input2 is L2:
+ ```
+ |<- L2 ->|
+ array1: [output--- |input 1---|---]
+ array2: [input 2---]
+ ```
+ Let's define 3 __m128i pointers, `pos1` starts from `input1`,
+ `pos2` starts from `input2`, these 2 point at the next byte to
+ read, `out` starts from `output`, pointing at the next byte to
+ overwrite.
+ ```
+ array1: [output--- |input 1---|---]
+ ^ ^
+ out pos1
+ array2: [input 2---]
+ ^
+ pos2
+ ```
+ The union output always contains less or equal number of elements
+ than all inputs added, so we have:
+ ```
+ out <= pos1 + pos2
+ ```
+ therefore:
+ ```
+ out <= pos1 + L2
+ ```
+ which means you will not overwrite data beyond pos1, so the data
+ haven't read is safe, and we don't care the data already read.
+ */
+ src_1->cardinality = (int32_t)fast_union_uint16(
+ src_1->array + src_2->cardinality, src_1->cardinality,
+ src_2->array, src_2->cardinality, src_1->array);
+ return false; // not a bitset
}
}
*dst = bitset_container_create();
@@ -13892,7 +17452,9 @@ bool array_array_container_lazy_inplace_union(
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_union.c */
/* begin file src/containers/mixed_xor.c */
@@ -13905,16 +17467,17 @@ bool array_array_container_lazy_inplace_union(
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
/* Compute the xor of src_1 and src_2 and write the result to
* dst (which has no container initially).
* Result is true iff dst is a bitset */
-bool array_bitset_container_xor(
- const array_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool array_bitset_container_xor(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
bitset_container_t *result = bitset_container_create();
bitset_container_copy(src_2, result);
result->cardinality = (int32_t)bitset_flip_list_withcard(
@@ -13950,14 +17513,13 @@ void array_bitset_container_lazy_xor(const array_container_t *src_1,
* result true) or an array container.
*/
-bool run_bitset_container_xor(
- const run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool run_bitset_container_xor(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
bitset_container_t *result = bitset_container_create();
bitset_container_copy(src_2, result);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
bitset_flip_range(result->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -13982,7 +17544,7 @@ void run_bitset_container_lazy_xor(const run_container_t *src_1,
const bitset_container_t *src_2,
bitset_container_t *dst) {
if (src_2 != dst) bitset_container_copy(src_2, dst);
- int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ int32_t rlepos; for(rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
rle16_t rle = src_1->runs[rlepos];
bitset_flip_range(dst->words, rle.value,
rle.value + rle.length + UINT32_C(1));
@@ -13994,10 +17556,8 @@ void run_bitset_container_lazy_xor(const run_container_t *src_1,
* can become any kind of container.
*/
-int array_run_container_xor(
- const array_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+int array_run_container_xor(const array_container_t *src_1,
+ const run_container_t *src_2, container_t **dst) {
// semi following Java XOR implementation as of May 2016
// the C OR implementation works quite differently and can return a run
// container
@@ -14021,15 +17581,13 @@ int array_run_container_xor(
array_container_t *temp = array_container_from_run(src_2);
bool ret_is_bitset = array_array_container_xor(temp, src_1, dst);
array_container_free(temp);
- return ret_is_bitset ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE;
+ return ret_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE;
} else { // guess that it will end up as a bitset
bitset_container_t *result = bitset_container_from_run(src_2);
bool is_bitset = bitset_array_container_ixor(result, src_1, dst);
// any necessary type conversion has been done by the ixor
- int retval = (is_bitset ? BITSET_CONTAINER_TYPE
- : ARRAY_CONTAINER_TYPE);
+ int retval = (is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
return retval;
}
}
@@ -14073,10 +17631,8 @@ void array_run_container_lazy_xor(const array_container_t *src_1,
* can become any kind of container.
*/
-int run_run_container_xor(
- const run_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+int run_run_container_xor(const run_container_t *src_1,
+ const run_container_t *src_2, container_t **dst) {
run_container_t *ans = run_container_create();
run_container_xor(src_1, src_2, ans);
uint8_t typecode_after;
@@ -14092,10 +17648,9 @@ int run_run_container_xor(
*
*/
-bool array_array_container_xor(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_xor(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int totalCardinality =
src_1->cardinality + src_2->cardinality; // upper bound
if (totalCardinality <= DEFAULT_MAX_SIZE) {
@@ -14118,16 +17673,28 @@ bool array_array_container_xor(
return returnval;
}
-bool array_array_container_lazy_xor(
- const array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_lazy_xor(const array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int totalCardinality = src_1->cardinality + src_2->cardinality;
- // upper bound, but probably poor estimate for xor
+ //
+ // We assume that operations involving bitset containers will be faster than
+ // operations involving solely array containers, except maybe when array
+ // containers are small. Indeed, for example, it is cheap to compute the
+ // exclusive union between an array and a bitset container, generally more
+ // so than between a large array and another array. So it is advantageous to
+ // favour bitset containers during the computation. Of course, if we convert
+ // array containers eagerly to bitset containers, we may later need to
+ // revert the bitset containers to array containerr to satisfy the Roaring
+ // format requirements, but such one-time conversions at the end may not be
+ // overly expensive. We arrived to this design based on extensive
+ // benchmarking on unions. For XOR/exclusive union, we simply followed the
+ // heuristic used by the unions (see mixed_union.c). Further tuning is
+ // possible.
+ //
if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
*dst = array_container_create_given_capacity(totalCardinality);
- if (*dst != NULL)
- array_container_xor(src_1, src_2, CAST_array(*dst));
+ if (*dst != NULL) array_container_xor(src_1, src_2, CAST_array(*dst));
return false; // not a bitset
}
*dst = bitset_container_from_array(src_1);
@@ -14145,10 +17712,9 @@ bool array_array_container_lazy_xor(
* "dst is a bitset"
*/
-bool bitset_bitset_container_xor(
- const bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool bitset_bitset_container_xor(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
bitset_container_t *ans = bitset_container_create();
int card = bitset_container_xor(src_1, src_2, ans);
if (card <= DEFAULT_MAX_SIZE) {
@@ -14168,10 +17734,9 @@ bool bitset_bitset_container_xor(
* cases, the caller is responsible for deallocating dst.
* Returns true iff dst is a bitset */
-bool bitset_array_container_ixor(
- bitset_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool bitset_array_container_ixor(bitset_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
*dst = src_1;
src_1->cardinality = (uint32_t)bitset_flip_list_withcard(
src_1->words, src_1->cardinality, src_2->array, src_2->cardinality);
@@ -14189,10 +17754,9 @@ bool bitset_array_container_ixor(
* Anything inplace with a bitset is a good candidate
*/
-bool bitset_bitset_container_ixor(
- bitset_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool bitset_bitset_container_ixor(bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
int card = bitset_container_xor(src_1, src_2, src_1);
if (card <= DEFAULT_MAX_SIZE) {
*dst = array_container_from_bitset(src_1);
@@ -14204,10 +17768,9 @@ bool bitset_bitset_container_ixor(
}
}
-bool array_bitset_container_ixor(
- array_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool array_bitset_container_ixor(array_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
bool ans = array_bitset_container_xor(src_1, src_2, dst);
array_container_free(src_1);
return ans;
@@ -14220,19 +17783,17 @@ bool array_bitset_container_ixor(
* result true) or an array container.
*/
-bool run_bitset_container_ixor(
- run_container_t *src_1, const bitset_container_t *src_2,
- container_t **dst
-){
+bool run_bitset_container_ixor(run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst) {
bool ans = run_bitset_container_xor(src_1, src_2, dst);
run_container_free(src_1);
return ans;
}
-bool bitset_run_container_ixor(
- bitset_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+bool bitset_run_container_ixor(bitset_container_t *src_1,
+ const run_container_t *src_2,
+ container_t **dst) {
bool ans = run_bitset_container_xor(src_2, src_1, dst);
bitset_container_free(src_1);
return ans;
@@ -14242,44 +17803,40 @@ bool bitset_run_container_ixor(
* can become any kind of container.
*/
-int array_run_container_ixor(
- array_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+int array_run_container_ixor(array_container_t *src_1,
+ const run_container_t *src_2, container_t **dst) {
int ans = array_run_container_xor(src_1, src_2, dst);
array_container_free(src_1);
return ans;
}
-int run_array_container_ixor(
- run_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+int run_array_container_ixor(run_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
int ans = array_run_container_xor(src_2, src_1, dst);
run_container_free(src_1);
return ans;
}
-bool array_array_container_ixor(
- array_container_t *src_1, const array_container_t *src_2,
- container_t **dst
-){
+bool array_array_container_ixor(array_container_t *src_1,
+ const array_container_t *src_2,
+ container_t **dst) {
bool ans = array_array_container_xor(src_1, src_2, dst);
array_container_free(src_1);
return ans;
}
-int run_run_container_ixor(
- run_container_t *src_1, const run_container_t *src_2,
- container_t **dst
-){
+int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst) {
int ans = run_run_container_xor(src_1, src_2, dst);
run_container_free(src_1);
return ans;
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/containers/mixed_xor.c */
/* begin file src/containers/run.c */
@@ -14287,8 +17844,20 @@ int run_run_container_ixor(
#include <stdlib.h>
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
extern inline uint16_t run_container_minimum(const run_container_t *run);
@@ -14297,15 +17866,15 @@ extern inline int32_t interleavedBinarySearch(const rle16_t *array,
int32_t lenarray, uint16_t ikey);
extern inline bool run_container_contains(const run_container_t *run,
uint16_t pos);
-extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
+extern inline int run_container_index_equalorlarger(const run_container_t *arr,
+ uint16_t x);
extern inline bool run_container_is_full(const run_container_t *run);
extern inline bool run_container_nonzero_cardinality(const run_container_t *rc);
extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
extern inline run_container_t *run_container_create_range(uint32_t start,
- uint32_t stop);
+ uint32_t stop);
extern inline int run_container_cardinality(const run_container_t *run);
-
bool run_container_add(run_container_t *run, uint16_t pos) {
int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
if (index >= 0) return false; // already there
@@ -14359,12 +17928,14 @@ bool run_container_add(run_container_t *run, uint16_t pos) {
run_container_t *run_container_create_given_capacity(int32_t size) {
run_container_t *run;
/* Allocate the run container itself. */
- if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == NULL) {
+ if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) ==
+ NULL) {
return NULL;
}
- if (size <= 0 ) { // we don't want to rely on malloc(0)
+ if (size <= 0) { // we don't want to rely on malloc(0)
run->runs = NULL;
- } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * size)) == NULL) {
+ } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) *
+ size)) == NULL) {
roaring_free(run);
return NULL;
}
@@ -14378,7 +17949,8 @@ int run_container_shrink_to_fit(run_container_t *src) {
int savings = src->capacity - src->n_runs;
src->capacity = src->n_runs;
rle16_t *oldruns = src->runs;
- src->runs = (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t));
+ src->runs =
+ (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t));
if (src->runs == NULL) roaring_free(oldruns); // should never happen?
return savings;
}
@@ -14387,6 +17959,7 @@ run_container_t *run_container_create(void) {
return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
}
+ALLOW_UNALIGNED
run_container_t *run_container_clone(const run_container_t *src) {
run_container_t *run = run_container_create_given_capacity(src->capacity);
if (run == NULL) return NULL;
@@ -14396,9 +17969,8 @@ run_container_t *run_container_clone(const run_container_t *src) {
return run;
}
-void run_container_offset(const run_container_t *c,
- container_t **loc, container_t **hic,
- uint16_t offset) {
+void run_container_offset(const run_container_t *c, container_t **loc,
+ container_t **hic, uint16_t offset) {
run_container_t *lo = NULL, *hi = NULL;
bool split;
@@ -14413,36 +17985,37 @@ void run_container_offset(const run_container_t *c,
lo_cap = c->n_runs;
hi_cap = 0;
} else {
- split = c->runs[pivot].value <= top;
+ split = c->runs[pivot].value < top;
lo_cap = pivot + (split ? 1 : 0);
hi_cap = c->n_runs - pivot;
}
if (loc && lo_cap) {
lo = run_container_create_given_capacity(lo_cap);
- memcpy(lo->runs, c->runs, lo_cap*sizeof(rle16_t));
+ memcpy(lo->runs, c->runs, lo_cap * sizeof(rle16_t));
lo->n_runs = lo_cap;
- int i; for (i = 0; i < lo_cap; ++i) {
+ int i; for(i = 0; i < lo_cap; ++i) {
lo->runs[i].value += offset;
}
- *loc = (container_t*)lo;
+ *loc = (container_t *)lo;
}
if (hic && hi_cap) {
hi = run_container_create_given_capacity(hi_cap);
- memcpy(hi->runs, c->runs+pivot, hi_cap*sizeof(rle16_t));
+ memcpy(hi->runs, c->runs + pivot, hi_cap * sizeof(rle16_t));
hi->n_runs = hi_cap;
- int i; for (i = 0; i < hi_cap; ++i) {
+ int i; for(i = 0; i < hi_cap; ++i) {
hi->runs[i].value += offset;
}
- *hic = (container_t*)hi;
+ *hic = (container_t *)hi;
}
// Fix the split.
if (split) {
if (lo != NULL) {
// Add the missing run to 'lo', exhausting length.
- lo->runs[lo->n_runs-1].length = (1 << 16) - lo->runs[lo->n_runs-1].value - 1;
+ lo->runs[lo->n_runs - 1].length =
+ (1 << 16) - lo->runs[lo->n_runs - 1].value - 1;
}
if (hi != NULL) {
@@ -14455,40 +18028,35 @@ void run_container_offset(const run_container_t *c,
/* Free memory. */
void run_container_free(run_container_t *run) {
- if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
- roaring_free(run->runs);
- run->runs = NULL; // pedantic
+ if (run->runs !=
+ NULL) { // Jon Strabala reports that some tools complain otherwise
+ roaring_free(run->runs);
+ run->runs = NULL; // pedantic
}
roaring_free(run);
}
void run_container_grow(run_container_t *run, int32_t min, bool copy) {
- int32_t newCapacity =
- (run->capacity == 0)
- ? RUN_DEFAULT_INIT_SIZE
- : run->capacity < 64 ? run->capacity * 2
- : run->capacity < 1024 ? run->capacity * 3 / 2
- : run->capacity * 5 / 4;
+ int32_t newCapacity = (run->capacity == 0) ? RUN_DEFAULT_INIT_SIZE
+ : run->capacity < 64 ? run->capacity * 2
+ : run->capacity < 1024 ? run->capacity * 3 / 2
+ : run->capacity * 5 / 4;
if (newCapacity < min) newCapacity = min;
run->capacity = newCapacity;
assert(run->capacity >= min);
if (copy) {
rle16_t *oldruns = run->runs;
- run->runs =
- (rle16_t *)roaring_realloc(oldruns, run->capacity * sizeof(rle16_t));
+ run->runs = (rle16_t *)roaring_realloc(oldruns,
+ run->capacity * sizeof(rle16_t));
if (run->runs == NULL) roaring_free(oldruns);
} else {
// Jon Strabala reports that some tools complain otherwise
if (run->runs != NULL) {
- roaring_free(run->runs);
+ roaring_free(run->runs);
}
run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
}
- // handle the case where realloc fails
- if (run->runs == NULL) {
- fprintf(stderr, "could not allocate memory\n");
- }
- assert(run->runs != NULL);
+ // We may have run->runs == NULL.
}
/* copy one container into another */
@@ -14808,7 +18376,7 @@ int run_container_intersection_cardinality(const run_container_t *src_1,
}
bool run_container_intersect(const run_container_t *src_1,
- const run_container_t *src_2) {
+ const run_container_t *src_2) {
const bool if1 = run_container_is_full(src_1);
const bool if2 = run_container_is_full(src_2);
if (if1 || if2) {
@@ -14816,7 +18384,7 @@ bool run_container_intersect(const run_container_t *src_1,
return !run_container_empty(src_2);
}
if (if2) {
- return !run_container_empty(src_1);
+ return !run_container_empty(src_1);
}
}
int32_t rlepos = 0;
@@ -14845,7 +18413,6 @@ bool run_container_intersect(const run_container_t *src_1,
return false;
}
-
/* Compute the difference of src_1 and src_2 and write the result to
* dst. It is assumed that dst is distinct from both src_1 and src_2. */
void run_container_andnot(const run_container_t *src_1,
@@ -14912,10 +18479,10 @@ int run_container_to_uint32_array(void *vout, const run_container_t *cont,
uint32_t base) {
int outpos = 0;
uint32_t *out = (uint32_t *)vout;
- int i; for (i = 0; i < cont->n_runs; ++i) {
+ int i; for(i = 0; i < cont->n_runs; ++i) {
uint32_t run_start = base + cont->runs[i].value;
uint16_t le = cont->runs[i].length;
- int j;for (j = 0; j <= le; ++j) {
+ int j; for(j = 0; j <= le; ++j) {
uint32_t val = run_start + j;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
@@ -14925,18 +18492,20 @@ int run_container_to_uint32_array(void *vout, const run_container_t *cont,
return outpos;
}
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
/*
* Print this container using printf (useful for debugging).
*/
void run_container_printf(const run_container_t *cont) {
- int i; for (i = 0; i < cont->n_runs; ++i) {
+
+ int i; for(i = 0; i < cont->n_runs; ++i) {
uint16_t run_start = cont->runs[i].value;
uint16_t le = cont->runs[i].length;
printf("[%d,%d]", run_start, run_start + le);
}
}
+
/*
* Print this container using printf as a comma-separated list of 32-bit
* integers starting at base.
@@ -14948,15 +18517,67 @@ void run_container_printf_as_uint32_array(const run_container_t *cont,
uint32_t run_start = base + cont->runs[0].value;
uint16_t le = cont->runs[0].length;
printf("%u", run_start);
- uint32_t j;for ( j = 1; j <= le; ++j) printf(",%u", run_start + j);
+ uint32_t j; for(j = 1; j <= le; ++j) printf(",%u", run_start + j);
}
- int32_t i; for (i = 1; i < cont->n_runs; ++i) {
+ int32_t i; for(i = 1; i < cont->n_runs; ++i) {
uint32_t run_start = base + cont->runs[i].value;
uint16_t le = cont->runs[i].length;
- uint32_t j;for ( j = 0; j <= le; ++j) printf(",%u", run_start + j);
+ uint32_t j; for(j = 0; j <= le; ++j) printf(",%u", run_start + j);
}
}
#endif
+
+/*
+ * Validate the container. Returns true if valid.
+ */
+bool run_container_validate(const run_container_t *run, const char **reason) {
+ if (run->n_runs < 0) {
+ *reason = "negative run count";
+ return false;
+ }
+ if (run->capacity < 0) {
+ *reason = "negative run capacity";
+ return false;
+ }
+ if (run->capacity < run->n_runs) {
+ *reason = "capacity less than run count";
+ return false;
+ }
+
+ if (run->n_runs == 0) {
+ *reason = "zero run count";
+ return false;
+ }
+ if (run->runs == NULL) {
+ *reason = "NULL runs";
+ return false;
+ }
+
+ // Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX.
+ uint32_t last_end = 0;
+ int i; for(i = 0; i < run->n_runs; ++i) {
+ uint32_t start = run->runs[i].value;
+ uint32_t end = start + run->runs[i].length + 1;
+ if (end <= start) {
+ *reason = "run start + length overflow";
+ return false;
+ }
+ if (end > (1 << 16)) {
+ *reason = "run start + length too large";
+ return false;
+ }
+ if (start < last_end) {
+ *reason = "run start less than last end";
+ return false;
+ }
+ if (start == last_end && last_end != 0) {
+ *reason = "run start equal to last end, should have combined";
+ return false;
+ }
+ last_end = end;
+ }
+ return true;
+}
int32_t run_container_write(const run_container_t *container, char *buf) {
uint16_t cast_16 = container->n_runs;
@@ -14974,20 +18595,20 @@ int32_t run_container_read(int32_t cardinality, run_container_t *container,
container->n_runs = cast_16;
if (container->n_runs > container->capacity)
run_container_grow(container, container->n_runs, false);
- if(container->n_runs > 0) {
- memcpy(container->runs, buf + sizeof(uint16_t),
- container->n_runs * sizeof(rle16_t));
+ if (container->n_runs > 0) {
+ memcpy(container->runs, buf + sizeof(uint16_t),
+ container->n_runs * sizeof(rle16_t));
}
return run_container_size_in_bytes(container);
}
bool run_container_iterate(const run_container_t *cont, uint32_t base,
roaring_iterator iterator, void *ptr) {
- int i; for (i = 0; i < cont->n_runs; ++i) {
+ int i; for(i = 0; i < cont->n_runs; ++i) {
uint32_t run_start = base + cont->runs[i].value;
uint16_t le = cont->runs[i].length;
- int j;for (j = 0; j <= le; ++j)
+ int j; for(j = 0; j <= le; ++j)
if (!iterator(run_start + j, ptr)) return false;
}
return true;
@@ -14996,11 +18617,11 @@ bool run_container_iterate(const run_container_t *cont, uint32_t base,
bool run_container_iterate64(const run_container_t *cont, uint32_t base,
roaring_iterator64 iterator, uint64_t high_bits,
void *ptr) {
- int i; for (i = 0; i < cont->n_runs; ++i) {
+ int i; for(i = 0; i < cont->n_runs; ++i) {
uint32_t run_start = base + cont->runs[i].value;
uint16_t le = cont->runs[i].length;
- int j;for (j = 0; j <= le; ++j)
+ int j; for(j = 0; j <= le; ++j)
if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
return false;
}
@@ -15086,7 +18707,7 @@ void run_container_smart_append_exclusive(run_container_t *src,
bool run_container_select(const run_container_t *container,
uint32_t *start_rank, uint32_t rank,
uint32_t *element) {
- int i; for (i = 0; i < container->n_runs; i++) {
+ int i; for(i = 0; i < container->n_runs; i++) {
uint16_t length = container->runs[i].length;
if (rank <= *start_rank + length) {
uint16_t value = container->runs[i].value;
@@ -15101,7 +18722,7 @@ bool run_container_select(const run_container_t *container,
int run_container_rank(const run_container_t *container, uint16_t x) {
int sum = 0;
uint32_t x32 = x;
- int i; for (i = 0; i < container->n_runs; i++) {
+ int i; for(i = 0; i < container->n_runs; i++) {
uint32_t startpoint = container->runs[i].value;
uint32_t length = container->runs[i].length;
uint32_t endpoint = length + startpoint;
@@ -15114,8 +18735,105 @@ int run_container_rank(const run_container_t *container, uint16_t x) {
}
return sum;
}
+uint32_t run_container_rank_many(const run_container_t *container,
+ uint64_t start_rank, const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans) {
+ const uint16_t high = (uint16_t)((*begin) >> 16);
+ const uint32_t *iter = begin;
+ int sum = 0;
+ int i = 0;
+ for (; iter != end; iter++) {
+ uint32_t x = *iter;
+ uint16_t xhigh = (uint16_t)(x >> 16);
+ if (xhigh != high) return iter - begin; // stop at next container
+
+ uint32_t x32 = x & 0xFFFF;
+ while (i < container->n_runs) {
+ uint32_t startpoint = container->runs[i].value;
+ uint32_t length = container->runs[i].length;
+ uint32_t endpoint = length + startpoint;
+ if (x32 <= endpoint) {
+ if (x32 < startpoint) {
+ *(ans++) = start_rank + sum;
+ } else {
+ *(ans++) = start_rank + sum + (x32 - startpoint) + 1;
+ }
+ break;
+ } else {
+ sum += length + 1;
+ i++;
+ }
+ }
+ if (i >= container->n_runs) *(ans++) = start_rank + sum;
+ }
+
+ return iter - begin;
+}
+
+int run_container_get_index(const run_container_t *container, uint16_t x) {
+ if (run_container_contains(container, x)) {
+ int sum = 0;
+ uint32_t x32 = x;
+ int i; for(i = 0; i < container->n_runs; i++) {
+ uint32_t startpoint = container->runs[i].value;
+ uint32_t length = container->runs[i].length;
+ uint32_t endpoint = length + startpoint;
+ if (x <= endpoint) {
+ if (x < startpoint) break;
+ return sum + (x32 - startpoint);
+ } else {
+ sum += length + 1;
+ }
+ }
+ return sum - 1;
+ } else {
+ return -1;
+ }
+}
+
+#if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512
+
+CROARING_TARGET_AVX512
+ALLOW_UNALIGNED
+/* Get the cardinality of `run'. Requires an actual computation. */
+static inline int _avx512_run_container_cardinality(
+ const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ int32_t k = 0;
+ const int32_t step = sizeof(__m512i) / sizeof(rle16_t);
+ if (n_runs > step) {
+ __m512i total = _mm512_setzero_si512();
+ for (; k + step <= n_runs; k += step) {
+ __m512i ymm1 = _mm512_loadu_si512((const __m512i *)(runs + k));
+ __m512i justlengths = _mm512_srli_epi32(ymm1, 16);
+ total = _mm512_add_epi32(total, justlengths);
+ }
+
+ __m256i lo = _mm512_extracti32x8_epi32(total, 0);
+ __m256i hi = _mm512_extracti32x8_epi32(total, 1);
+
+ // a store might be faster than extract?
+ uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
+ _mm256_storeu_si256((__m256i *)buffer, lo);
+ sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
+ (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
+
+ _mm256_storeu_si256((__m256i *)buffer, hi);
+ sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
+ (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
+ }
+ for (; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
-#ifdef CROARING_IS_X64
+CROARING_UNTARGET_AVX512
CROARING_TARGET_AVX2
ALLOW_UNALIGNED
@@ -15148,16 +18866,17 @@ static inline int _avx2_run_container_cardinality(const run_container_t *run) {
return sum;
}
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
/* Get the cardinality of `run'. Requires an actual computation. */
-static inline int _scalar_run_container_cardinality(const run_container_t *run) {
+static inline int _scalar_run_container_cardinality(
+ const run_container_t *run) {
const int32_t n_runs = run->n_runs;
const rle16_t *runs = run->runs;
/* by initializing with n_runs, we omit counting the +1 for each pair. */
int sum = n_runs;
- for (int k = 0; k < n_runs; ++k) {
+ int k; for(k = 0; k < n_runs; ++k) {
sum += runs[k].length;
}
@@ -15165,11 +18884,16 @@ static inline int _scalar_run_container_cardinality(const run_container_t *run)
}
int run_container_cardinality(const run_container_t *run) {
- if( croaring_avx2() ) {
- return _avx2_run_container_cardinality(run);
- } else {
- return _scalar_run_container_cardinality(run);
- }
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
+ return _avx512_run_container_cardinality(run);
+ } else
+#endif
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
+ return _avx2_run_container_cardinality(run);
+ } else {
+ return _scalar_run_container_cardinality(run);
+ }
}
#else
@@ -15180,7 +18904,7 @@ int run_container_cardinality(const run_container_t *run) {
/* by initializing with n_runs, we omit counting the +1 for each pair. */
int sum = n_runs;
- int k; for ( k = 0; k < n_runs; ++k) {
+ int k; for(k = 0; k < n_runs; ++k) {
sum += runs[k].length;
}
@@ -15188,22 +18912,365 @@ int run_container_cardinality(const run_container_t *run) {
}
#endif
+#ifdef __cplusplus
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
+#endif
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif/* end file src/containers/run.c */
+/* begin file src/isadetection.c */
+
+/* From
+https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
+Highly modified.
+
+Copyright (c) 2016- Facebook, Inc (Adam Paszke)
+Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
+Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
+(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
+Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
+America and IDIAP Research Institute nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+// Binaries produced by Visual Studio with solely AVX2 routines
+// can compile to AVX-512 thus causing crashes on non-AVX-512 systems.
+// This appears to affect VS 17.8 and 17.9. We disable AVX-512 and AVX2
+// on these systems. It seems that ClangCL is not affected.
+// https://github.com/RoaringBitmap/CRoaring/pull/603
+#ifndef __clang__
+#if _MSC_VER >= 1938
+#define ROARING_DISABLE_AVX 1
+#endif // _MSC_VER >= 1938
+#endif // __clang__
+
+// We need portability.h to be included first, see
+// https://github.com/RoaringBitmap/CRoaring/issues/394
+#if CROARING_REGULAR_VISUAL_STUDIO
+#include <intrin.h>
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+#include <cpuid.h>
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+namespace roaring {
+namespace internal {
+#endif
+enum croaring_instruction_set {
+ CROARING_DEFAULT = 0x0,
+ CROARING_NEON = 0x1,
+ CROARING_AVX2 = 0x4,
+ CROARING_SSE42 = 0x8,
+ CROARING_PCLMULQDQ = 0x10,
+ CROARING_BMI1 = 0x20,
+ CROARING_BMI2 = 0x40,
+ CROARING_ALTIVEC = 0x80,
+ CROARING_AVX512F = 0x100,
+ CROARING_AVX512DQ = 0x200,
+ CROARING_AVX512BW = 0x400,
+ CROARING_AVX512VBMI2 = 0x800,
+ CROARING_AVX512BITALG = 0x1000,
+ CROARING_AVX512VPOPCNTDQ = 0x2000,
+ CROARING_UNINITIALIZED = 0x8000
+};
+
+#if CROARING_COMPILER_SUPPORTS_AVX512
+unsigned int CROARING_AVX512_REQUIRED =
+ (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW |
+ CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ);
+#endif
+
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+ uint32_t *edx) {
+#if CROARING_REGULAR_VISUAL_STUDIO
+ int cpu_info[4];
+ __cpuidex(cpu_info, *eax, *ecx);
+ *eax = cpu_info[0];
+ *ebx = cpu_info[1];
+ *ecx = cpu_info[2];
+ *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+ uint32_t level = *eax;
+ __get_cpuid(level, eax, ebx, ecx, edx);
+#else
+ uint32_t a = *eax, b, c = *ecx, d;
+ __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+ *eax = a;
+ *ebx = b;
+ *ecx = c;
+ *edx = d;
+#endif
+}
+
+static inline uint64_t xgetbv(void) {
+#if defined(_MSC_VER)
+ return _xgetbv(0);
+#else
+ uint32_t xcr0_lo, xcr0_hi;
+ __asm__("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0));
+ return xcr0_lo | ((uint64_t)xcr0_hi << 32);
+#endif
+}
+
+/**
+ * This is a relatively expensive function but it will get called at most
+ * *once* per compilation units. Normally, the CRoaring library is built
+ * as one compilation unit.
+ */
+static inline uint32_t dynamic_croaring_detect_supported_architectures(void) {
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t host_isa = 0x0;
+ // Can be found on Intel ISA Reference for CPUID
+ static uint32_t cpuid_avx2_bit =
+ 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7
+ static uint32_t cpuid_bmi1_bit =
+ 1 << 3; ///< @private bit 3 of EBX for EAX=0x7
+ static uint32_t cpuid_bmi2_bit =
+ 1 << 8; ///< @private bit 8 of EBX for EAX=0x7
+ static uint32_t cpuid_avx512f_bit =
+ 1 << 16; ///< @private bit 16 of EBX for EAX=0x7
+ static uint32_t cpuid_avx512dq_bit =
+ 1 << 17; ///< @private bit 17 of EBX for EAX=0x7
+ static uint32_t cpuid_avx512bw_bit =
+ 1 << 30; ///< @private bit 30 of EBX for EAX=0x7
+ static uint32_t cpuid_avx512vbmi2_bit =
+ 1 << 6; ///< @private bit 6 of ECX for EAX=0x7
+ static uint32_t cpuid_avx512bitalg_bit =
+ 1 << 12; ///< @private bit 12 of ECX for EAX=0x7
+ static uint32_t cpuid_avx512vpopcntdq_bit =
+ 1 << 14; ///< @private bit 14 of ECX for EAX=0x7
+ static uint64_t cpuid_avx256_saved = 1 << 2; ///< @private bit 2 = AVX
+ static uint64_t cpuid_avx512_saved =
+ 7 << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
+ static uint32_t cpuid_sse42_bit =
+ 1 << 20; ///< @private bit 20 of ECX for EAX=0x1
+ static uint32_t cpuid_osxsave =
+ (1 << 26) | (1 << 27); ///< @private bits 26+27 of ECX for EAX=0x1
+ static uint32_t cpuid_pclmulqdq_bit =
+ 1 << 1; ///< @private bit 1 of ECX for EAX=0x1
+
+ // EBX for EAX=0x1
+ eax = 0x1;
+ ecx = 0x0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (ecx & cpuid_sse42_bit) {
+ host_isa |= CROARING_SSE42;
+ } else {
+ return host_isa; // everything after is redundant
+ }
+
+ if (ecx & cpuid_pclmulqdq_bit) {
+ host_isa |= CROARING_PCLMULQDQ;
+ }
+
+ if ((ecx & cpuid_osxsave) != cpuid_osxsave) {
+ return host_isa;
+ }
+
+ // xgetbv for checking if the OS saves registers
+ uint64_t xcr0 = xgetbv();
+
+ if ((xcr0 & cpuid_avx256_saved) == 0) {
+ return host_isa;
+ }
+ // ECX for EAX=0x7
+ eax = 0x7;
+ ecx = 0x0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx & cpuid_avx2_bit) {
+ host_isa |= CROARING_AVX2;
+ }
+ if (ebx & cpuid_bmi1_bit) {
+ host_isa |= CROARING_BMI1;
+ }
+
+ if (ebx & cpuid_bmi2_bit) {
+ host_isa |= CROARING_BMI2;
+ }
+
+ if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) {
+ return host_isa;
+ }
+
+ if (ebx & cpuid_avx512f_bit) {
+ host_isa |= CROARING_AVX512F;
+ }
+
+ if (ebx & cpuid_avx512bw_bit) {
+ host_isa |= CROARING_AVX512BW;
+ }
+
+ if (ebx & cpuid_avx512dq_bit) {
+ host_isa |= CROARING_AVX512DQ;
+ }
+
+ if (ecx & cpuid_avx512vbmi2_bit) {
+ host_isa |= CROARING_AVX512VBMI2;
+ }
+
+ if (ecx & cpuid_avx512bitalg_bit) {
+ host_isa |= CROARING_AVX512BITALG;
+ }
+
+ if (ecx & cpuid_avx512vpopcntdq_bit) {
+ host_isa |= CROARING_AVX512VPOPCNTDQ;
+ }
+
+ return host_isa;
+}
+
+#endif // end SIMD extension detection code
+
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
+static inline uint32_t croaring_detect_supported_architectures(void) {
+ // thread-safe as per the C++11 standard.
+ static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
+ return buffer;
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+static uint32_t croaring_detect_supported_architectures(void) {
+ // we use an atomic for thread safety
+ static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
+ if (buffer == CROARING_UNINITIALIZED) {
+ // atomicity is sufficient
+ buffer = dynamic_croaring_detect_supported_architectures();
+ }
+ return buffer;
+}
+#else
+// If we do not have atomics, we do the best we can.
+static inline uint32_t croaring_detect_supported_architectures(void) {
+ static uint32_t buffer = CROARING_UNINITIALIZED;
+ if (buffer == CROARING_UNINITIALIZED) {
+ buffer = dynamic_croaring_detect_supported_architectures();
+ }
+ return buffer;
+}
+#endif // CROARING_C_ATOMIC
+
+#ifdef ROARING_DISABLE_AVX
+
+int croaring_hardware_support(void) { return 0; }
+
+#elif defined(__AVX512F__) && defined(__AVX512DQ__) && \
+ defined(__AVX512BW__) && defined(__AVX512VBMI2__) && \
+ defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
+int croaring_hardware_support(void) {
+ return ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512;
+}
+#elif defined(__AVX2__)
+
+int croaring_hardware_support(void) {
+ static
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+ _Atomic
+#endif
+ int support = 0xFFFFFFF;
+ if (support == 0xFFFFFFF) {
+ bool avx512_support = false;
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ avx512_support =
+ ((croaring_detect_supported_architectures() &
+ CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED);
+#endif
+ support = ROARING_SUPPORTS_AVX2 |
+ (avx512_support ? ROARING_SUPPORTS_AVX512 : 0);
+ }
+ return support;
+}
+#else
+
+int croaring_hardware_support(void) {
+ static
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+ _Atomic
+#endif
+ int support = 0xFFFFFFF;
+ if (support == 0xFFFFFFF) {
+ bool has_avx2 = (croaring_detect_supported_architectures() &
+ CROARING_AVX2) == CROARING_AVX2;
+ bool has_avx512 = false;
+#if CROARING_COMPILER_SUPPORTS_AVX512
+ has_avx512 = (croaring_detect_supported_architectures() &
+ CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED;
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+ support = (has_avx2 ? ROARING_SUPPORTS_AVX2 : 0) |
+ (has_avx512 ? ROARING_SUPPORTS_AVX512 : 0);
+ }
+ return support;
+}
+#endif
+
+#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
-/* end file src/containers/run.c */
+/* end file src/isadetection.c */
/* begin file src/memory.c */
#include <stdlib.h>
+
// without the following, we get lots of warnings about posix_memalign
#ifndef __cplusplus
-extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
+extern int posix_memalign(void** __memptr, size_t __alignment, size_t __size);
#endif //__cplusplus // C++ does not have a well defined signature
// portable version of posix_memalign
-static void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
- void *p;
+static void* roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
+ void* p;
#ifdef _MSC_VER
p = _aligned_malloc(size, alignment);
#elif defined(__MINGW32__) || defined(__MINGW64__)
@@ -15216,7 +19283,7 @@ static void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
return p;
}
-static void roaring_bitmap_aligned_free(void *memblock) {
+static void roaring_bitmap_aligned_free(void* memblock) {
#ifdef _MSC_VER
_aligned_free(memblock);
#elif defined(__MINGW32__) || defined(__MINGW64__)
@@ -15239,9 +19306,7 @@ void roaring_init_memory_hook(roaring_memory_t memory_hook) {
global_memory_hook = memory_hook;
}
-void* roaring_malloc(size_t n) {
- return global_memory_hook.malloc(n);
-}
+void* roaring_malloc(size_t n) { return global_memory_hook.malloc(n); }
void* roaring_realloc(void* p, size_t new_sz) {
return global_memory_hook.realloc(p, new_sz);
@@ -15251,39 +19316,46 @@ void* roaring_calloc(size_t n_elements, size_t element_size) {
return global_memory_hook.calloc(n_elements, element_size);
}
-void roaring_free(void* p) {
- global_memory_hook.free(p);
-}
+void roaring_free(void* p) { global_memory_hook.free(p); }
void* roaring_aligned_malloc(size_t alignment, size_t size) {
return global_memory_hook.aligned_malloc(alignment, size);
}
-void roaring_aligned_free(void* p) {
- global_memory_hook.aligned_free(p);
-}
+void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); }
/* end file src/memory.c */
/* begin file src/roaring.c */
#include <assert.h>
+#include <inttypes.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
-#include <inttypes.h>
+// Include after roaring.h
#ifdef __cplusplus
using namespace ::roaring::internal;
-extern "C" { namespace roaring { namespace api {
+extern "C" {
+namespace roaring {
+namespace api {
#endif
#define CROARING_SERIALIZATION_ARRAY_UINT32 1
#define CROARING_SERIALIZATION_CONTAINER 2
-
-extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
-extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
+extern inline int roaring_trailing_zeroes(unsigned long long input_num);
+extern inline int roaring_leading_zeroes(unsigned long long input_num);
+extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r);
+extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r);
+extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r,
+ bool cow);
+extern inline roaring_bitmap_t *roaring_bitmap_create(void);
+extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min,
+ uint64_t max);
+extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
+ uint64_t min, uint64_t max);
static inline bool is_cow(const roaring_bitmap_t *r) {
return r->high_low_container.flags & ROARING_FLAG_COW;
@@ -15297,17 +19369,17 @@ static inline bool is_frozen(const roaring_bitmap_t *r) {
// that we can recover the container touched, which, in turn can be used to
// accelerate some functions (when you repeatedly need to add to the same
// container)
-static inline container_t *containerptr_roaring_bitmap_add(
- roaring_bitmap_t *r, uint32_t val,
- uint8_t *type, int *index
-){
+static inline container_t *containerptr_roaring_bitmap_add(roaring_bitmap_t *r,
+ uint32_t val,
+ uint8_t *type,
+ int *index) {
roaring_array_t *ra = &r->high_low_container;
uint16_t hb = val >> 16;
const int i = ra_get_index(ra, hb);
if (i >= 0) {
- ra_unshare_container_at_index(ra, i);
- container_t *c = ra_get_container_at_index(ra, i, type);
+ ra_unshare_container_at_index(ra, (uint16_t)i);
+ container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type);
uint8_t new_type = *type;
container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
*index = i;
@@ -15321,8 +19393,8 @@ static inline container_t *containerptr_roaring_bitmap_add(
}
} else {
array_container_t *new_ac = array_container_create();
- container_t *c = container_add(new_ac, val & 0xFFFF,
- ARRAY_CONTAINER_TYPE, type);
+ container_t *c =
+ container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type);
// we could just assume that it stays an array container
ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
*index = -i - 1;
@@ -15355,8 +19427,8 @@ static inline void add_bulk_impl(roaring_bitmap_t *r,
if (context->container == NULL || context->key != key) {
uint8_t typecode;
int idx;
- context->container = containerptr_roaring_bitmap_add(
- r, val, &typecode, &idx);
+ context->container =
+ containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
context->typecode = typecode;
context->idx = idx;
context->key = key;
@@ -15394,7 +19466,8 @@ void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
container_t *container;
val = *current_val;
container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
- roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode};
+ roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16),
+ typecode};
for (; current_val != end; current_val++) {
memcpy(&val, current_val, sizeof(val));
@@ -15409,8 +19482,7 @@ void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
roaring_bulk_context_t *context,
- uint32_t val)
-{
+ uint32_t val) {
uint16_t key = val >> 16;
if (context->container == NULL || context->key != key) {
int32_t start_idx = -1;
@@ -15422,17 +19494,21 @@ bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
return false;
}
uint8_t typecode;
- context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode);
+ context->container = ra_get_container_at_index(
+ &r->high_low_container, (uint16_t)idx, &typecode);
context->typecode = typecode;
context->idx = idx;
- context->key = ra_get_key_at_index(&r->high_low_container, idx);
- // ra_advance_until finds the next key >= the target, we found a later container.
+ context->key =
+ ra_get_key_at_index(&r->high_low_container, (uint16_t)idx);
+ // ra_advance_until finds the next key >= the target, we found a later
+ // container.
if (context->key != key) {
return false;
}
}
// context is now set up
- return container_contains(context->container, val & 0xFFFF, context->typecode);
+ return container_contains(context->container, val & 0xFFFF,
+ context->typecode);
}
roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
@@ -15446,11 +19522,10 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
// include long lists
roaring_bitmap_t *answer = roaring_bitmap_create();
roaring_bulk_context_t context;
- va_list ap;
-
memset(&context, 0, sizeof(context));
+ va_list ap;
va_start(ap, n_args);
- size_t i; for ( i = 0; i < n_args; i++) {
+ size_t i; for(i = 0; i < n_args; i++) {
uint32_t val = va_arg(ap, uint32_t);
roaring_bitmap_add_bulk(answer, &context, val);
}
@@ -15458,24 +19533,20 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
return answer;
}
-static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
- return (a < b) ? a : b;
-}
-
static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
return (a < b) ? a : b;
}
roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
uint32_t step) {
- if(max >= UINT64_C(0x100000000)) {
+ if (max >= UINT64_C(0x100000000)) {
max = UINT64_C(0x100000000);
}
if (step == 0) return NULL;
if (max <= min) return NULL;
roaring_bitmap_t *answer = roaring_bitmap_create();
if (step >= (1 << 16)) {
- uint32_t value; for ( value = (uint32_t)min; value < max; value += step) {
+ uint32_t value; for(value = (uint32_t)min; value < max; value += step) {
roaring_bitmap_add(answer, value);
}
return answer;
@@ -15484,11 +19555,12 @@ roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
do {
uint32_t key = (uint32_t)min_tmp >> 16;
uint32_t container_min = min_tmp & 0xFFFF;
- uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
+ uint32_t container_max =
+ (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
uint8_t type;
- container_t *container = container_from_range(&type, container_min,
- container_max, (uint16_t)step);
- ra_append(&answer->high_low_container, key, container, type);
+ container_t *container = container_from_range(
+ &type, container_min, container_max, (uint16_t)step);
+ ra_append(&answer->high_low_container, (uint16_t)key, container, type);
uint32_t gap = container_max - container_min + step - 1;
min_tmp += gap - (gap % step);
} while (min_tmp < max);
@@ -15496,7 +19568,8 @@ roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
return answer;
}
-void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min,
+ uint32_t max) {
if (min > max) {
return;
}
@@ -15507,9 +19580,10 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t
uint32_t max_key = max >> 16;
int32_t num_required_containers = max_key - min_key + 1;
- int32_t suffix_length = count_greater(ra->keys, ra->size, max_key);
- int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length,
- min_key);
+ int32_t suffix_length =
+ count_greater(ra->keys, ra->size, (uint16_t)max_key);
+ int32_t prefix_length =
+ count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key);
int32_t common_length = ra->size - prefix_length - suffix_length;
if (num_required_containers > common_length) {
@@ -15519,34 +19593,34 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t
int32_t src = prefix_length + common_length - 1;
int32_t dst = ra->size - suffix_length - 1;
- uint32_t key; for ( key = max_key; key != min_key-1; key--) { // beware of min_key==0
+ uint32_t key; for(key = max_key; key != min_key - 1;
+ key--) { // beware of min_key==0
uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
- container_t* new_container;
+ container_t *new_container;
uint8_t new_type;
if (src >= 0 && ra->keys[src] == key) {
- ra_unshare_container_at_index(ra, src);
- new_container = container_add_range(ra->containers[src],
- ra->typecodes[src],
- container_min, container_max,
- &new_type);
+ ra_unshare_container_at_index(ra, (uint16_t)src);
+ new_container =
+ container_add_range(ra->containers[src], ra->typecodes[src],
+ container_min, container_max, &new_type);
if (new_container != ra->containers[src]) {
- container_free(ra->containers[src],
- ra->typecodes[src]);
+ container_free(ra->containers[src], ra->typecodes[src]);
}
src--;
} else {
new_container = container_from_range(&new_type, container_min,
- container_max+1, 1);
+ container_max + 1, 1);
}
- ra_replace_key_and_container_at_index(ra, dst, key, new_container,
- new_type);
+ ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key,
+ new_container, new_type);
dst--;
}
}
-void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min,
+ uint32_t max) {
if (min > max) {
return;
}
@@ -15556,21 +19630,21 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint3
uint32_t min_key = min >> 16;
uint32_t max_key = max >> 16;
- int32_t src = count_less(ra->keys, ra->size, min_key);
+ int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key);
int32_t dst = src;
while (src < ra->size && ra->keys[src] <= max_key) {
- uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
- uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
- ra_unshare_container_at_index(ra, src);
+ uint32_t container_min =
+ (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
+ uint32_t container_max =
+ (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
+ ra_unshare_container_at_index(ra, (uint16_t)src);
container_t *new_container;
uint8_t new_type;
- new_container = container_remove_range(ra->containers[src],
- ra->typecodes[src],
- container_min, container_max,
- &new_type);
+ new_container =
+ container_remove_range(ra->containers[src], ra->typecodes[src],
+ container_min, container_max, &new_type);
if (new_container != ra->containers[src]) {
- container_free(ra->containers[src],
- ra->typecodes[src]);
+ container_free(ra->containers[src], ra->typecodes[src]);
}
if (new_container) {
ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
@@ -15584,15 +19658,12 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint3
}
}
-extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
-extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
-
-#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
void roaring_bitmap_printf(const roaring_bitmap_t *r) {
const roaring_array_t *ra = &r->high_low_container;
printf("{");
- int i; for (i = 0; i < ra->size; ++i) {
+ int i; for(i = 0; i < ra->size; ++i) {
container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i],
((uint32_t)ra->keys[i]) << 16);
@@ -15607,14 +19678,14 @@ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
const roaring_array_t *ra = &r->high_low_container;
printf("{");
- int i; for (i = 0; i < ra->size; ++i) {
+ int i; for(i = 0; i < ra->size; ++i) {
printf("%d: %s (%d)", ra->keys[i],
get_full_container_name(ra->containers[i], ra->typecodes[i]),
container_get_cardinality(ra->containers[i], ra->typecodes[i]));
if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
- printf(
- "(shared count = %" PRIu32 " )",
- CAST_shared(ra->containers[i])->counter);
+ printf("(shared count = %" PRIu32 " )",
+ croaring_refcount_get(
+ &(CAST_shared(ra->containers[i])->counter)));
}
if (i + 1 < ra->size) {
@@ -15624,7 +19695,7 @@ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
printf("}");
}
#endif
-
+
typedef struct min_max_sum_s {
uint32_t min;
uint32_t max;
@@ -15640,9 +19711,9 @@ static bool min_max_sum_fnc(uint32_t value, void *param) {
}
/**
-* (For advanced users.)
-* Collect statistics about the bitmap
-*/
+ * (For advanced users.)
+ * Collect statistics about the bitmap
+ */
void roaring_bitmap_statistics(const roaring_bitmap_t *r,
roaring_statistics_t *stat) {
const roaring_array_t *ra = &r->high_low_container;
@@ -15659,7 +19730,7 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,
stat->max_value = mms.max;
stat->sum_value = mms.sum;
- int i; for (i = 0; i < ra->size; ++i) {
+ int i; for(i = 0; i < ra->size; ++i) {
uint8_t truetype =
get_container_type(ra->containers[i], ra->typecodes[i]);
uint32_t card =
@@ -15684,11 +19755,84 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,
break;
default:
assert(false);
- __builtin_unreachable();
+ roaring_unreachable;
}
}
}
+/*
+ * Checks that:
+ * - Array containers are sorted and contain no duplicates
+ * - Range containers are sorted and contain no overlapping ranges
+ * - Roaring containers are sorted by key and there are no duplicate keys
+ * - The correct container type is use for each container (e.g. bitmaps aren't
+ * used for small containers)
+ */
+bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r,
+ const char **reason) {
+ const char *reason_local;
+ if (reason == NULL) {
+ // Always allow assigning through *reason
+ reason = &reason_local;
+ }
+ *reason = NULL;
+ const roaring_array_t *ra = &r->high_low_container;
+ if (ra->size < 0) {
+ *reason = "negative size";
+ return false;
+ }
+ if (ra->allocation_size < 0) {
+ *reason = "negative allocation size";
+ return false;
+ }
+ if (ra->size > ra->allocation_size) {
+ *reason = "more containers than allocated space";
+ return false;
+ }
+ if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) {
+ *reason = "invalid flags";
+ return false;
+ }
+ if (ra->size == 0) {
+ return true;
+ }
+
+ if (ra->keys == NULL) {
+ *reason = "keys is NULL";
+ return false;
+ }
+ if (ra->typecodes == NULL) {
+ *reason = "typecodes is NULL";
+ return false;
+ }
+ if (ra->containers == NULL) {
+ *reason = "containers is NULL";
+ return false;
+ }
+
+ uint32_t prev_key = ra->keys[0];
+ int32_t i; for(i = 1; i < ra->size; ++i) {
+ if (ra->keys[i] <= prev_key) {
+ *reason = "keys not strictly increasing";
+ return false;
+ }
+ prev_key = ra->keys[i];
+ }
+
+ for(i = 0; i < ra->size; ++i) {
+ if (!container_internal_validate(ra->containers[i], ra->typecodes[i],
+ reason)) {
+ // reason should already be set
+ if (*reason == NULL) {
+ *reason = "container failed to validate but no reason given";
+ }
+ return false;
+ }
+ }
+
+ return true;
+}
+
roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
roaring_bitmap_t *ans =
(roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
@@ -15696,14 +19840,12 @@ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
return NULL;
}
if (!ra_init_with_capacity( // allocation of list of containers can fail
- &ans->high_low_container, r->high_low_container.size)
- ){
+ &ans->high_low_container, r->high_low_container.size)) {
roaring_free(ans);
return NULL;
}
if (!ra_overwrite( // memory allocation of individual containers may fail
- &r->high_low_container, &ans->high_low_container, is_cow(r))
- ){
+ &r->high_low_container, &ans->high_low_container, is_cow(r))) {
roaring_bitmap_free(ans); // overwrite should leave in freeable state
return NULL;
}
@@ -15712,21 +19854,24 @@ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
}
bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
- const roaring_bitmap_t *src) {
+ const roaring_bitmap_t *src) {
roaring_bitmap_set_copy_on_write(dest, is_cow(src));
return ra_overwrite(&src->high_low_container, &dest->high_low_container,
is_cow(src));
}
void roaring_bitmap_free(const roaring_bitmap_t *r) {
+ if (r == NULL) {
+ return;
+ }
if (!is_frozen(r)) {
- ra_clear((roaring_array_t*)&r->high_low_container);
+ ra_clear((roaring_array_t *)&r->high_low_container);
}
- roaring_free((roaring_bitmap_t*)r);
+ roaring_free((roaring_bitmap_t *)r);
}
void roaring_bitmap_clear(roaring_bitmap_t *r) {
- ra_reset(&r->high_low_container);
+ ra_reset(&r->high_low_container);
}
void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
@@ -15736,9 +19881,9 @@ void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
const int i = ra_get_index(ra, hb);
uint8_t typecode;
if (i >= 0) {
- ra_unshare_container_at_index(ra, i);
+ ra_unshare_container_at_index(ra, (uint16_t)i);
container_t *container =
- ra_get_container_at_index(ra, i, &typecode);
+ ra_get_container_at_index(ra, (uint16_t)i, &typecode);
uint8_t newtypecode = typecode;
container_t *container2 =
container_add(container, val & 0xFFFF, typecode, &newtypecode);
@@ -15749,8 +19894,8 @@ void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
}
} else {
array_container_t *newac = array_container_create();
- container_t *container = container_add(newac, val & 0xFFFF,
- ARRAY_CONTAINER_TYPE, &typecode);
+ container_t *container =
+ container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode);
// we could just assume that it stays an array container
ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
container, typecode);
@@ -15763,9 +19908,9 @@ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
uint8_t typecode;
bool result = false;
if (i >= 0) {
- ra_unshare_container_at_index(&r->high_low_container, i);
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
+ container_t *container = ra_get_container_at_index(
+ &r->high_low_container, (uint16_t)i, &typecode);
const int oldCardinality =
container_get_cardinality(container, typecode);
@@ -15786,8 +19931,8 @@ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
}
} else {
array_container_t *newac = array_container_create();
- container_t *container = container_add(newac, val & 0xFFFF,
- ARRAY_CONTAINER_TYPE, &typecode);
+ container_t *container =
+ container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode);
// we could just assume that it stays an array container
ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
container, typecode);
@@ -15802,9 +19947,9 @@ void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
const int i = ra_get_index(&r->high_low_container, hb);
uint8_t typecode;
if (i >= 0) {
- ra_unshare_container_at_index(&r->high_low_container, i);
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
+ container_t *container = ra_get_container_at_index(
+ &r->high_low_container, (uint16_t)i, &typecode);
uint8_t newtypecode = typecode;
container_t *container2 =
container_remove(container, val & 0xFFFF, typecode, &newtypecode);
@@ -15828,9 +19973,9 @@ bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
uint8_t typecode;
bool result = false;
if (i >= 0) {
- ra_unshare_container_at_index(&r->high_low_container, i);
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
+ container_t *container = ra_get_container_at_index(
+ &r->high_low_container, (uint16_t)i, &typecode);
const int oldCardinality =
container_get_cardinality(container, typecode);
@@ -15864,8 +20009,9 @@ void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
if (n_args == 0 || r->high_low_container.size == 0) {
return;
}
- int32_t pos = -1; // position of the container used in the previous iteration
- size_t i; for ( i = 0; i < n_args; i++) {
+ int32_t pos =
+ -1; // position of the container used in the previous iteration
+ size_t i; for(i = 0; i < n_args; i++) {
uint16_t key = (uint16_t)(vals[i] >> 16);
if (pos < 0 || key != r->high_low_container.keys[pos]) {
pos = ra_get_index(&r->high_low_container, key);
@@ -15873,10 +20019,9 @@ void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
if (pos >= 0) {
uint8_t new_typecode;
container_t *new_container;
- new_container = container_remove(r->high_low_container.containers[pos],
- vals[i] & 0xffff,
- r->high_low_container.typecodes[pos],
- &new_typecode);
+ new_container = container_remove(
+ r->high_low_container.containers[pos], vals[i] & 0xffff,
+ r->high_low_container.typecodes[pos], &new_typecode);
if (new_container != r->high_low_container.containers[pos]) {
container_free(r->high_low_container.containers[pos],
r->high_low_container.typecodes[pos]);
@@ -15906,15 +20051,17 @@ roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
int pos1 = 0, pos2 = 0;
while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ const uint16_t s1 =
+ ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ const uint16_t s2 =
+ ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
if (s1 == s2) {
uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
container_t *c = container_and(c1, type1, c2, type2, &result_type);
if (container_nonzero_cardinality(c, result_type)) {
@@ -15946,7 +20093,7 @@ roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
}
roaring_bitmap_t *answer =
roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
- size_t i; for ( i = 2; i < number; i++) {
+ size_t i; for(i = 2; i < number; i++) {
roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
}
roaring_bitmap_repair_after_lazy(answer);
@@ -15965,7 +20112,7 @@ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
return roaring_bitmap_copy(x[0]);
}
roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
- size_t i; for ( i = 2; i < number; i++) {
+ size_t i; for(i = 2; i < number; i++) {
roaring_bitmap_lazy_xor_inplace(answer, x[i]);
}
roaring_bitmap_repair_after_lazy(answer);
@@ -15983,21 +20130,23 @@ void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
// any skipped-over or newly emptied containers in x1
// have to be freed.
while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ const uint16_t s1 =
+ ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ const uint16_t s2 =
+ ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
if (s1 == s2) {
uint8_t type1, type2, result_type;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
-
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared
+ // container. Rationale: using a shared container safely with in
+ // place computation would require making a copy and then doing the
+ // computation in place which is likely less efficient than avoiding
+ // in place entirely and always generating a new container.
container_t *c =
(type1 == SHARED_CONTAINER_TYPE)
? container_and(c1, type1, c2, type2, &result_type)
@@ -16052,14 +20201,14 @@ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
container_t *c = container_or(c1, type1, c2, type2, &result_type);
// since we assume that the initial containers are non-empty, the
@@ -16070,12 +20219,12 @@ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
// c1 = container_clone(c1, type1);
c1 = get_copy_of_container(c1, &type1, is_cow(x1));
if (is_cow(x1)) {
@@ -16085,11 +20234,11 @@ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s1, c1, type1);
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
// c2 = container_clone(c2, type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
@@ -16099,7 +20248,7 @@ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s2, c2, type2);
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -16129,15 +20278,15 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
}
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
if (!container_is_full(c1, type1)) {
container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ &x2->high_low_container, (uint16_t)pos2, &type2);
container_t *c =
(type1 == SHARED_CONTAINER_TYPE)
? container_or(c1, type1, c2, type2, &result_type)
@@ -16154,17 +20303,17 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
- pos2, &type2);
+ (uint16_t)pos2, &type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
ra_set_container_at_index(&x2->high_low_container, pos2, c2,
@@ -16178,7 +20327,7 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
length1++;
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -16203,14 +20352,14 @@ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
container_t *c = container_xor(c1, type1, c2, type2, &result_type);
if (container_nonzero_cardinality(c, result_type)) {
@@ -16222,12 +20371,12 @@ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
c1 = get_copy_of_container(c1, &type1, is_cow(x1));
if (is_cow(x1)) {
ra_set_container_at_index(&x1->high_low_container, pos1, c1,
@@ -16236,11 +20385,11 @@ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s1, c1, type1);
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
ra_set_container_at_index(&x2->high_low_container, pos2, c2,
@@ -16249,7 +20398,7 @@ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s2, c2, type2);
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -16285,27 +20434,26 @@ void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
+ // We do the computation "in place" only when c1 is not a shared
+ // container. Rationale: using a shared container safely with in
+ // place computation would require making a copy and then doing the
+ // computation in place which is likely less efficient than avoiding
+ // in place entirely and always generating a new container.
container_t *c;
if (type1 == SHARED_CONTAINER_TYPE) {
c = container_xor(c1, type1, c2, type2, &result_type);
shared_container_free(CAST_shared(c1)); // so release
- }
- else {
+ } else {
c = container_ixor(c1, type1, c2, type2, &result_type);
}
@@ -16322,17 +20470,17 @@ void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
ra_set_container_at_index(&x2->high_low_container, pos2, c2,
@@ -16345,7 +20493,7 @@ void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
length1++;
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -16361,7 +20509,8 @@ roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
length2 = x2->high_low_container.size;
if (0 == length1) {
roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
- roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2));
+ roaring_bitmap_set_copy_on_write(empty_bitmap,
+ is_cow(x1) || is_cow(x2));
return empty_bitmap;
}
if (0 == length2) {
@@ -16375,16 +20524,16 @@ roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
uint16_t s1 = 0;
uint16_t s2 = 0;
while (true) {
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_andnot(c1, type1, c2, type2,
- &result_type);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
+ container_t *c =
+ container_andnot(c1, type1, c2, type2, &result_type);
if (container_nonzero_cardinality(c, result_type)) {
ra_append(&answer->high_low_container, s1, c, result_type);
@@ -16438,27 +20587,26 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
+ // We do the computation "in place" only when c1 is not a shared
+ // container. Rationale: using a shared container safely with in
+ // place computation would require making a copy and then doing the
+ // computation in place which is likely less efficient than avoiding
+ // in place entirely and always generating a new container.
container_t *c;
if (type1 == SHARED_CONTAINER_TYPE) {
c = container_andnot(c1, type1, c2, type2, &result_type);
shared_container_free(CAST_shared(c1)); // release
- }
- else {
+ } else {
c = container_iandnot(c1, type1, c2, type2, &result_type);
}
@@ -16474,27 +20622,26 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
if (pos1 != intersection_size) {
container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ &x1->high_low_container, (uint16_t)pos1, &type1);
- ra_replace_key_and_container_at_index(&x1->high_low_container,
- intersection_size, s1, c1,
- type1);
+ ra_replace_key_and_container_at_index(
+ &x1->high_low_container, intersection_size, s1, c1, type1);
}
intersection_size++;
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
@@ -16520,7 +20667,7 @@ uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
const roaring_array_t *ra = &r->high_low_container;
uint64_t card = 0;
- int i; for (i = 0; i < ra->size; ++i)
+ int i; for(i = 0; i < ra->size; ++i)
card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
return card;
}
@@ -16536,11 +20683,11 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
if (range_start >= range_end) {
return 0;
}
- range_end--; // make range_end inclusive
+ range_end--; // make range_end inclusive
// now we have: 0 <= range_start <= range_end <= UINT32_MAX
- uint16_t minhb = range_start >> 16;
- uint16_t maxhb = range_end >> 16;
+ uint16_t minhb = (uint16_t)(range_start >> 16);
+ uint16_t maxhb = (uint16_t)(range_end >> 16);
uint64_t card = 0;
@@ -16550,8 +20697,8 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
card += container_rank(ra->containers[i], ra->typecodes[i],
range_end & 0xffff);
} else {
- card += container_get_cardinality(ra->containers[i],
- ra->typecodes[i]);
+ card +=
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]);
}
if ((range_start & 0xffff) != 0) {
card -= container_rank(ra->containers[i], ra->typecodes[i],
@@ -16565,8 +20712,8 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
for (; i < ra->size; i++) {
uint16_t key = ra->keys[i];
if (key < maxhb) {
- card += container_get_cardinality(ra->containers[i],
- ra->typecodes[i]);
+ card +=
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]);
} else if (key == maxhb) {
card += container_rank(ra->containers[i], ra->typecodes[i],
range_end & 0xffff);
@@ -16579,7 +20726,6 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
return card;
}
-
bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
return r->high_low_container.size == 0;
}
@@ -16588,9 +20734,8 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
ra_to_uint32_array(&r->high_low_container, ans);
}
-bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
- size_t offset, size_t limit,
- uint32_t *ans) {
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
+ size_t limit, uint32_t *ans) {
return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
}
@@ -16598,15 +20743,16 @@ bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
* efficient;
* also convert from run containers when more space efficient. Returns
* true if the result has at least one run container.
-*/
+ */
bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
bool answer = false;
- int i; for (i = 0; i < r->high_low_container.size; i++) {
+ int i; for(i = 0; i < r->high_low_container.size; i++) {
uint8_t type_original, type_after;
ra_unshare_container_at_index(
- &r->high_low_container, i); // TODO: this introduces extra cloning!
- container_t *c = ra_get_container_at_index(&r->high_low_container, i,
- &type_original);
+ &r->high_low_container,
+ (uint16_t)i); // TODO: this introduces extra cloning!
+ container_t *c = ra_get_container_at_index(&r->high_low_container,
+ (uint16_t)i, &type_original);
container_t *c1 = convert_run_optimize(c, type_original, &type_after);
if (type_after == RUN_CONTAINER_TYPE) {
answer = true;
@@ -16618,10 +20764,10 @@ bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
size_t answer = 0;
- int i; for (i = 0; i < r->high_low_container.size; i++) {
+ int i; for(i = 0; i < r->high_low_container.size; i++) {
uint8_t type_original;
- container_t *c = ra_get_container_at_index(&r->high_low_container, i,
- &type_original);
+ container_t *c = ra_get_container_at_index(&r->high_low_container,
+ (uint16_t)i, &type_original);
answer += container_shrink_to_fit(c, type_original);
}
answer += ra_shrink_to_fit(&r->high_low_container);
@@ -16634,17 +20780,17 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
*/
bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
bool answer = false;
- int i; for (i = 0; i < r->high_low_container.size; i++) {
+ int i; for(i = 0; i < r->high_low_container.size; i++) {
uint8_t type_original, type_after;
- container_t *c = ra_get_container_at_index(&r->high_low_container, i,
- &type_original);
+ container_t *c = ra_get_container_at_index(&r->high_low_container,
+ (uint16_t)i, &type_original);
if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
answer = true;
if (type_original == SHARED_CONTAINER_TYPE) {
run_container_t *truec = CAST_run(CAST_shared(c)->container);
int32_t card = run_container_cardinality(truec);
container_t *c1 = convert_to_bitset_or_array_container(
- truec, card, &type_after);
+ truec, card, &type_after);
shared_container_free(CAST_shared(c)); // frees run as needed
ra_set_container_at_index(&r->high_low_container, i, c1,
type_after);
@@ -16652,7 +20798,7 @@ bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
} else {
int32_t card = run_container_cardinality(CAST_run(c));
container_t *c1 = convert_to_bitset_or_array_container(
- CAST_run(c), card, &type_after);
+ CAST_run(c), card, &type_after);
run_container_free(CAST_run(c));
ra_set_container_at_index(&r->high_low_container, i, c1,
type_after);
@@ -16680,25 +20826,30 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) {
size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) {
size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
- uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) +
- sizeof(uint32_t);
- return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
+ uint64_t sizeasarray =
+ roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + sizeof(uint32_t);
+ return portablesize < sizeasarray ? portablesize + 1
+ : (size_t)sizeasarray + 1;
}
size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
return ra_portable_size_in_bytes(&r->high_low_container);
}
-
-roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
+ size_t maxbytes) {
roaring_bitmap_t *ans =
(roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
if (ans == NULL) {
return NULL;
}
size_t bytesread;
- bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
- if(is_ok) assert(bytesread <= maxbytes);
+ bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf,
+ maxbytes, &bytesread);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
roaring_bitmap_set_copy_on_write(ans, false);
if (!is_ok) {
roaring_free(ans);
@@ -16711,14 +20862,12 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
}
-
-size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
- return ra_portable_deserialize_size(buf, maxbytes);
+size_t roaring_bitmap_portable_deserialize_size(const char *buf,
+ size_t maxbytes) {
+ return ra_portable_deserialize_size(buf, maxbytes);
}
-
-size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r,
- char *buf) {
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) {
return ra_portable_serialize(&r->high_low_container, buf);
}
@@ -16727,37 +20876,82 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
/* This looks like a compressed set of uint32_t elements */
uint32_t card;
+
memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+
const uint32_t *elems =
(const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+
roaring_bitmap_t *bitmap = roaring_bitmap_create();
if (bitmap == NULL) {
return NULL;
}
- roaring_bulk_context_t context;
-
- memset(&context, 0, sizeof(context));
- uint32_t i; for ( i = 0; i < card; i++) {
+ roaring_bulk_context_t context; memset(&context, 0, sizeof(context));
+ uint32_t i; for(i = 0; i < card; i++) {
// elems may not be aligned, read with memcpy
uint32_t elem;
memcpy(&elem, elems + i, sizeof(elem));
roaring_bitmap_add_bulk(bitmap, &context, elem);
}
return bitmap;
+
} else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
return roaring_bitmap_portable_deserialize(bufaschar + 1);
} else
return (NULL);
}
+roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf,
+ size_t maxbytes) {
+ if (maxbytes < 1) {
+ return NULL;
+ }
+
+ const char *bufaschar = (const char *)buf;
+ if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
+ if (maxbytes < 1 + sizeof(uint32_t)) {
+ return NULL;
+ }
+
+ /* This looks like a compressed set of uint32_t elements */
+ uint32_t card;
+ memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+
+ // Check the buffer is big enough to contain card uint32_t elements
+ if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) {
+ return NULL;
+ }
+
+ const uint32_t *elems =
+ (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+
+ roaring_bitmap_t *bitmap = roaring_bitmap_create();
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ roaring_bulk_context_t context; memset(&context, 0, sizeof(context));
+ uint32_t i; for(i = 0; i < card; i++) {
+ // elems may not be aligned, read with memcpy
+ uint32_t elem;
+ memcpy(&elem, elems + i, sizeof(elem));
+ roaring_bitmap_add_bulk(bitmap, &context, elem);
+ }
+ return bitmap;
+
+ } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
+ return roaring_bitmap_portable_deserialize_safe(bufaschar + 1,
+ maxbytes - 1);
+ } else
+ return (NULL);
+}
+
bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
void *ptr) {
const roaring_array_t *ra = &r->high_low_container;
- int i; for (i = 0; i < ra->size; ++i)
+ int i; for(i = 0; i < ra->size; ++i)
if (!container_iterate(ra->containers[i], ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16,
- iterator, ptr)) {
+ ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) {
return false;
}
return true;
@@ -16767,215 +20961,165 @@ bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
uint64_t high_bits, void *ptr) {
const roaring_array_t *ra = &r->high_low_container;
- int i; for (i = 0; i < ra->size; ++i)
- if (!container_iterate64(
- ra->containers[i], ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16, iterator,
- high_bits, ptr)) {
+ int i; for(i = 0; i < ra->size; ++i)
+ if (!container_iterate64(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16, iterator,
+ high_bits, ptr)) {
return false;
}
return true;
}
/****
-* begin roaring_uint32_iterator_t
-*****/
-
-// Partially initializes the roaring iterator when it begins looking at
-// a new container.
-static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
- newit->in_container_index = 0;
- newit->run_index = 0;
+ * begin roaring_uint32_iterator_t
+ *****/
+
+/**
+ * Partially initializes the iterator. Leaves it in either state:
+ * 1. Invalid due to `has_value = false`, or
+ * 2. At a container, with the high bits set, `has_value = true`.
+ */
+CROARING_WARN_UNUSED static bool iter_new_container_partial_init(
+ roaring_uint32_iterator_t *newit) {
newit->current_value = 0;
if (newit->container_index >= newit->parent->high_low_container.size ||
newit->container_index < 0) {
newit->current_value = UINT32_MAX;
return (newit->has_value = false);
}
- // assume not empty
newit->has_value = true;
// we precompute container, typecode and highbits so that successive
// iterators do not have to grab them from odd memory locations
// and have to worry about the (easily predicted) container_unwrap_shared
// call.
newit->container =
- newit->parent->high_low_container.containers[newit->container_index];
+ newit->parent->high_low_container.containers[newit->container_index];
newit->typecode =
- newit->parent->high_low_container.typecodes[newit->container_index];
+ newit->parent->high_low_container.typecodes[newit->container_index];
newit->highbits =
- ((uint32_t)
- newit->parent->high_low_container.keys[newit->container_index])
- << 16;
+ ((uint32_t)
+ newit->parent->high_low_container.keys[newit->container_index])
+ << 16;
newit->container =
- container_unwrap_shared(newit->container, &(newit->typecode));
- return newit->has_value;
-}
-
-static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
- if (!iter_new_container_partial_init(newit))
- return newit->has_value;
-
- switch (newit->typecode) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc = const_CAST_bitset(newit->container);
-
- uint32_t wordindex = 0;
- uint64_t word;
- while ((word = bc->words[wordindex]) == 0) {
- wordindex++; // advance
- }
- // here "word" is non-zero
- newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
- newit->current_value = newit->highbits | newit->in_container_index;
- break; }
-
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac = const_CAST_array(newit->container);
- newit->current_value = newit->highbits | ac->array[0];
- break; }
-
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(newit->container);
- newit->current_value = newit->highbits | rc->runs[0].value;
- break; }
-
- default:
- // if this ever happens, bug!
- assert(false);
- } // switch (typecode)
+ container_unwrap_shared(newit->container, &(newit->typecode));
return true;
}
-static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
- if (!iter_new_container_partial_init(newit))
- return newit->has_value;
-
- switch(newit->typecode) {
- case BITSET_CONTAINER_TYPE: {
- uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
- uint64_t word;
- const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
- while ((word = bitset_container->words[wordindex]) == 0)
- --wordindex;
-
- int num_leading_zeros = __builtin_clzll(word);
- newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
- newit->current_value = newit->highbits | newit->in_container_index;
- break;
- }
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t* array_container = (const array_container_t*)newit->container;
- newit->in_container_index = array_container->cardinality - 1;
- newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
- break;
- }
- case RUN_CONTAINER_TYPE: {
- const run_container_t* run_container = (const run_container_t*)newit->container;
- newit->run_index = run_container->n_runs - 1;
- const rle16_t* last_run = &run_container->runs[newit->run_index];
- newit->current_value = newit->highbits | (last_run->value + last_run->length);
- break;
- }
- default:
- // if this ever happens, bug!
- assert(false);
+/**
+ * Positions the iterator at the first value of the current container that the
+ * iterator points at, if available.
+ */
+CROARING_WARN_UNUSED static bool loadfirstvalue(
+ roaring_uint32_iterator_t *newit) {
+ if (iter_new_container_partial_init(newit)) {
+ uint16_t value = 0;
+ newit->container_it =
+ container_init_iterator(newit->container, newit->typecode, &value);
+ newit->current_value = newit->highbits | value;
}
- return true;
+ return newit->has_value;
}
-// prerequesite: the value should be in range of the container
-static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
- // Don't have to check return value because of prerequisite
- iter_new_container_partial_init(newit);
- uint16_t lb = val & 0xFFFF;
-
- switch (newit->typecode) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc = const_CAST_bitset(newit->container);
- newit->in_container_index =
- bitset_container_index_equalorlarger(bc, lb);
- newit->current_value = newit->highbits | newit->in_container_index;
- break; }
-
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac = const_CAST_array(newit->container);
- newit->in_container_index =
- array_container_index_equalorlarger(ac, lb);
- newit->current_value =
- newit->highbits | ac->array[newit->in_container_index];
- break; }
-
- case RUN_CONTAINER_TYPE: {
- const run_container_t *rc = const_CAST_run(newit->container);
- newit->run_index = run_container_index_equalorlarger(rc, lb);
- if (rc->runs[newit->run_index].value <= lb) {
- newit->current_value = val;
- } else {
- newit->current_value =
- newit->highbits | rc->runs[newit->run_index].value;
- }
- break; }
-
- default:
- __builtin_unreachable();
+/**
+ * Positions the iterator at the last value of the current container that the
+ * iterator points at, if available.
+ */
+CROARING_WARN_UNUSED static bool loadlastvalue(
+ roaring_uint32_iterator_t *newit) {
+ if (iter_new_container_partial_init(newit)) {
+ uint16_t value = 0;
+ newit->container_it = container_init_iterator_last(
+ newit->container, newit->typecode, &value);
+ newit->current_value = newit->highbits | value;
}
+ return newit->has_value;
+}
+/**
+ * Positions the iterator at the smallest value that is larger than or equal to
+ * `val` within the current container that the iterator points at. Assumes such
+ * a value exists within the current container.
+ */
+CROARING_WARN_UNUSED static bool loadfirstvalue_largeorequal(
+ roaring_uint32_iterator_t *newit, uint32_t val) {
+ bool partial_init = iter_new_container_partial_init(newit);
+ assert(partial_init);
+ if (!partial_init) {
+ return false;
+ }
+ uint16_t value = 0;
+ newit->container_it =
+ container_init_iterator(newit->container, newit->typecode, &value);
+ bool found = container_iterator_lower_bound(
+ newit->container, newit->typecode, &newit->container_it, &value,
+ val & 0xFFFF);
+ assert(found);
+ if (!found) {
+ return false;
+ }
+ newit->current_value = newit->highbits | value;
return true;
}
-void roaring_init_iterator(const roaring_bitmap_t *r,
+void roaring_iterator_init(const roaring_bitmap_t *r,
roaring_uint32_iterator_t *newit) {
newit->parent = r;
newit->container_index = 0;
newit->has_value = loadfirstvalue(newit);
}
-void roaring_init_iterator_last(const roaring_bitmap_t *r,
+void roaring_iterator_init_last(const roaring_bitmap_t *r,
roaring_uint32_iterator_t *newit) {
newit->parent = r;
newit->container_index = newit->parent->high_low_container.size - 1;
newit->has_value = loadlastvalue(newit);
}
-roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) {
+roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r) {
roaring_uint32_iterator_t *newit =
- (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ (roaring_uint32_iterator_t *)roaring_malloc(
+ sizeof(roaring_uint32_iterator_t));
if (newit == NULL) return NULL;
- roaring_init_iterator(r, newit);
+ roaring_iterator_init(r, newit);
return newit;
}
-roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+roaring_uint32_iterator_t *roaring_uint32_iterator_copy(
const roaring_uint32_iterator_t *it) {
roaring_uint32_iterator_t *newit =
- (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ (roaring_uint32_iterator_t *)roaring_malloc(
+ sizeof(roaring_uint32_iterator_t));
memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
return newit;
}
-bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
+bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it,
+ uint32_t val) {
uint16_t hb = val >> 16;
- const int i = ra_get_index(& it->parent->high_low_container, hb);
+ const int i = ra_get_index(&it->parent->high_low_container, hb);
if (i >= 0) {
- uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
- uint16_t lb = val & 0xFFFF;
- if(lowvalue < lb ) {
- it->container_index = i+1; // will have to load first value of next container
- } else {// the value is necessarily within the range of the container
- it->container_index = i;
- it->has_value = loadfirstvalue_largeorequal(it, val);
- return it->has_value;
- }
+ uint32_t lowvalue =
+ container_maximum(it->parent->high_low_container.containers[i],
+ it->parent->high_low_container.typecodes[i]);
+ uint16_t lb = val & 0xFFFF;
+ if (lowvalue < lb) {
+ // will have to load first value of next container
+ it->container_index = i + 1;
+ } else {
+ // the value is necessarily within the range of the container
+ it->container_index = i;
+ it->has_value = loadfirstvalue_largeorequal(it, val);
+ return it->has_value;
+ }
} else {
- // there is no matching, so we are going for the next container
- it->container_index = -i-1;
+ // there is no matching, so we are going for the next container
+ it->container_index = -i - 1;
}
it->has_value = loadfirstvalue(it);
return it->has_value;
}
-
-bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
+bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) {
if (it->container_index >= it->parent->high_low_container.size) {
return (it->has_value = false);
}
@@ -16983,70 +21127,17 @@ bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
it->container_index = 0;
return (it->has_value = loadfirstvalue(it));
}
-
- switch (it->typecode) {
- case BITSET_CONTAINER_TYPE: {
- const bitset_container_t *bc = const_CAST_bitset(it->container);
- it->in_container_index++;
-
- uint32_t wordindex = it->in_container_index / 64;
- if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
-
- uint64_t word = bc->words[wordindex] &
- (UINT64_MAX << (it->in_container_index % 64));
- // next part could be optimized/simplified
- while ((word == 0) &&
- (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
- wordindex++;
- word = bc->words[wordindex];
- }
- if (word != 0) {
- it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
- it->current_value = it->highbits | it->in_container_index;
- return (it->has_value = true);
- }
- break; }
-
- case ARRAY_CONTAINER_TYPE: {
- const array_container_t *ac = const_CAST_array(it->container);
- it->in_container_index++;
- if (it->in_container_index < ac->cardinality) {
- it->current_value =
- it->highbits | ac->array[it->in_container_index];
- return (it->has_value = true);
- }
- break; }
-
- case RUN_CONTAINER_TYPE: {
- if(it->current_value == UINT32_MAX) { // avoid overflow to zero
- return (it->has_value = false);
- }
-
- const run_container_t* rc = const_CAST_run(it->container);
- uint32_t limit = (it->highbits | (rc->runs[it->run_index].value +
- rc->runs[it->run_index].length));
- if (++it->current_value <= limit) {
- return (it->has_value = true);
- }
-
- if (++it->run_index < rc->n_runs) { // Assume the run has a value
- it->current_value =
- it->highbits | rc->runs[it->run_index].value;
- return (it->has_value = true);
- }
- break;
- }
-
- default:
- __builtin_unreachable();
+ uint16_t low16 = (uint16_t)it->current_value;
+ if (container_iterator_next(it->container, it->typecode, &it->container_it,
+ &low16)) {
+ it->current_value = it->highbits | low16;
+ return (it->has_value = true);
}
-
- // moving to next container
it->container_index++;
return (it->has_value = loadfirstvalue(it));
}
-bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
+bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it) {
if (it->container_index < 0) {
return (it->has_value = false);
}
@@ -17054,151 +21145,46 @@ bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
it->container_index = it->parent->high_low_container.size - 1;
return (it->has_value = loadlastvalue(it));
}
-
- switch (it->typecode) {
- case BITSET_CONTAINER_TYPE: {
- if (--it->in_container_index < 0)
- break;
-
- const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
- int32_t wordindex = it->in_container_index / 64;
- uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
-
- while (word == 0 && --wordindex >= 0) {
- word = bitset_container->words[wordindex];
- }
- if (word == 0)
- break;
-
- int num_leading_zeros = __builtin_clzll(word);
- it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
- it->current_value = it->highbits | it->in_container_index;
- return (it->has_value = true);
- }
- case ARRAY_CONTAINER_TYPE: {
- if (--it->in_container_index < 0)
- break;
-
- const array_container_t* array_container = (const array_container_t*)it->container;
- it->current_value = it->highbits | array_container->array[it->in_container_index];
- return (it->has_value = true);
- }
- case RUN_CONTAINER_TYPE: {
- if(it->current_value == 0)
- return (it->has_value = false);
-
- const run_container_t* run_container = (const run_container_t*)it->container;
- if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
- return (it->has_value = true);
- }
-
- if (--it->run_index < 0)
- break;
-
- it->current_value = it->highbits | (run_container->runs[it->run_index].value +
- run_container->runs[it->run_index].length);
- return (it->has_value = true);
- }
- default:
- // if this ever happens, bug!
- assert(false);
- } // switch (typecode)
-
- // moving to previous container
+ uint16_t low16 = (uint16_t)it->current_value;
+ if (container_iterator_prev(it->container, it->typecode, &it->container_it,
+ &low16)) {
+ it->current_value = it->highbits | low16;
+ return (it->has_value = true);
+ }
it->container_index--;
return (it->has_value = loadlastvalue(it));
}
-uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
- uint32_t ret = 0;
- uint32_t num_values;
- uint32_t wordindex; // used for bitsets
- uint64_t word; // used for bitsets
- const array_container_t* acont; //TODO remove
- const run_container_t* rcont; //TODO remove
- const bitset_container_t* bcont; //TODO remove
-
- while (it->has_value && ret < count) {
- switch (it->typecode) {
- case BITSET_CONTAINER_TYPE:
- bcont = const_CAST_bitset(it->container);
- wordindex = it->in_container_index / 64;
- word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
- do {
- while (word != 0 && ret < count) {
- buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
- word = word & (word - 1);
- buf++;
- ret++;
- }
- while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
- wordindex++;
- word = bcont->words[wordindex];
- }
- } while (word != 0 && ret < count);
- it->has_value = (word != 0);
- if (it->has_value) {
- it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
- it->current_value = it->highbits | it->in_container_index;
- }
- break;
- case ARRAY_CONTAINER_TYPE:
- acont = const_CAST_array(it->container);
- num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
- uint32_t i; for ( i = 0; i < num_values; i++) {
- buf[i] = it->highbits | acont->array[it->in_container_index + i];
- }
- buf += num_values;
- ret += num_values;
- it->in_container_index += num_values;
- it->has_value = (it->in_container_index < acont->cardinality);
- if (it->has_value) {
- it->current_value = it->highbits | acont->array[it->in_container_index];
- }
- break;
- case RUN_CONTAINER_TYPE:
- rcont = const_CAST_run(it->container);
- //"in_run_index" name is misleading, read it as "max_value_in_current_run"
- do {
- uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
- num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret);
- uint32_t i; for ( i = 0; i < num_values; i++) {
- buf[i] = it->current_value + i;
- }
- it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
- buf += num_values;
- ret += num_values;
-
- if (it->current_value > largest_run_value || it->current_value == 0) {
- it->run_index++;
- if (it->run_index < rcont->n_runs) {
- it->current_value = it->highbits | rcont->runs[it->run_index].value;
- } else {
- it->has_value = false;
- }
- }
- } while ((ret < count) && it->has_value);
- break;
- default:
- assert(false);
- }
- if (it->has_value) {
- assert(ret == count);
- return ret;
+uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
+ uint32_t *buf, uint32_t count) {
+ uint32_t ret = 0;
+ while (it->has_value && ret < count) {
+ uint32_t consumed;
+ uint16_t low16 = (uint16_t)it->current_value;
+ bool has_value = container_iterator_read_into_uint32(
+ it->container, it->typecode, &it->container_it, it->highbits, buf,
+ count - ret, &consumed, &low16);
+ ret += consumed;
+ buf += consumed;
+ if (has_value) {
+ it->has_value = true;
+ it->current_value = it->highbits | low16;
+ assert(ret == count);
+ return ret;
+ }
+ it->container_index++;
+ it->has_value = loadfirstvalue(it);
}
- it->container_index++;
- it->has_value = loadfirstvalue(it);
- }
- return ret;
+ return ret;
}
-
-
-void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); }
+void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) {
+ roaring_free(it);
+}
/****
-* end of roaring_uint32_iterator_t
-*****/
+ * end of roaring_uint32_iterator_t
+ *****/
bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2) {
@@ -17208,16 +21194,14 @@ bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
if (ra1->size != ra2->size) {
return false;
}
- int i; for (i = 0; i < ra1->size; ++i) {
+ int i; for(i = 0; i < ra1->size; ++i) {
if (ra1->keys[i] != ra2->keys[i]) {
return false;
}
}
- for (i = 0; i < ra1->size; ++i) {
- bool areequal = container_equals(ra1->containers[i],
- ra1->typecodes[i],
- ra2->containers[i],
- ra2->typecodes[i]);
+ for(i = 0; i < ra1->size; ++i) {
+ bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i],
+ ra2->containers[i], ra2->typecodes[i]);
if (!areequal) {
return false;
}
@@ -17230,21 +21214,21 @@ bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
const roaring_array_t *ra1 = &r1->high_low_container;
const roaring_array_t *ra2 = &r2->high_low_container;
- const int length1 = ra1->size,
- length2 = ra2->size;
+ const int length1 = ra1->size, length2 = ra2->size;
int pos1 = 0, pos2 = 0;
while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(ra1, pos1);
- const uint16_t s2 = ra_get_key_at_index(ra2, pos2);
+ const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1);
+ const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2);
if (s1 == s2) {
uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2);
- if (!container_is_subset(c1, type1, c2, type2))
- return false;
+ container_t *c1 =
+ ra_get_container_at_index(ra1, (uint16_t)pos1, &type1);
+ container_t *c2 =
+ ra_get_container_at_index(ra2, (uint16_t)pos2, &type2);
+ if (!container_is_subset(c1, type1, c2, type2)) return false;
++pos1;
++pos2;
} else if (s1 < s2) { // s1 < s2
@@ -17268,7 +21252,7 @@ static void insert_flipped_container(roaring_array_t *ans_arr,
container_t *flipped_container = NULL;
if (i >= 0) {
container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
+ ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
flipped_container =
container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
(uint32_t)(lb_end + 1), &ctype_out);
@@ -17294,7 +21278,7 @@ static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
container_t *flipped_container = NULL;
if (i >= 0) {
container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
+ ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
flipped_container = container_inot_range(
container_to_flip, ctype_in, (uint32_t)lb_start,
(uint32_t)(lb_end + 1), &ctype_out);
@@ -17323,7 +21307,7 @@ static void insert_fully_flipped_container(roaring_array_t *ans_arr,
container_t *flipped_container = NULL;
if (i >= 0) {
container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
+ ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
flipped_container =
container_not(container_to_flip, ctype_in, &ctype_out);
if (container_get_cardinality(flipped_container, ctype_out))
@@ -17345,7 +21329,7 @@ static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
container_t *flipped_container = NULL;
if (i >= 0) {
container_t *container_to_flip =
- ra_get_container_at_index(x1_arr, i, &ctype_in);
+ ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
flipped_container =
container_inot(container_to_flip, ctype_in, &ctype_out);
@@ -17369,7 +21353,7 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
if (range_start >= range_end) {
return roaring_bitmap_copy(x1);
}
- if(range_end >= UINT64_C(0x100000000)) {
+ if (range_end >= UINT64_C(0x100000000)) {
range_end = UINT64_C(0x100000000);
}
@@ -17399,9 +21383,10 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
- uint32_t hb; for ( hb = hb_start; hb <= hb_end; ++hb) {
+ uint32_t hb; for(hb = hb_start; hb <= hb_end; ++hb) {
insert_fully_flipped_container(&ans->high_low_container,
- &x1->high_low_container, hb);
+ &x1->high_low_container,
+ (uint16_t)hb);
}
// handle a partial final container
@@ -17422,7 +21407,7 @@ void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
if (range_start >= range_end) {
return; // empty range
}
- if(range_end >= UINT64_C(0x100000000)) {
+ if (range_end >= UINT64_C(0x100000000)) {
range_end = UINT64_C(0x100000000);
}
@@ -17445,8 +21430,8 @@ void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
if (lb_end != 0xFFFF) --hb_end;
- uint32_t hb; for ( hb = hb_start; hb <= hb_end; ++hb) {
- inplace_fully_flip_container(&x1->high_low_container, hb);
+ uint32_t hb; for(hb = hb_start; hb <= hb_end; ++hb) {
+ inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb);
}
// handle a partial final container
if (lb_end != 0xFFFF) {
@@ -17457,11 +21442,12 @@ void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
}
}
-static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, uint8_t t) {
+static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c,
+ uint8_t t) {
int size = ra_get_size(ra);
- if (size == 0 || ra_get_key_at_index(ra, size-1) != k) {
+ if (size == 0 || ra_get_key_at_index(ra, (uint16_t)(size - 1)) != k) {
// No merge.
- ra_append(ra, k, c, t);
+ ra_append(ra, (uint16_t)k, c, t);
return;
}
@@ -17471,13 +21457,13 @@ static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c,
// NOTE: we don't need to unwrap here, since we added last_c ourselves
// we have the certainty it's not a shared container.
// The same applies to c, as it's the result of calling container_offset.
- last_c = ra_get_container_at_index(ra, size-1, &last_t);
+ last_c = ra_get_container_at_index(ra, (uint16_t)(size - 1), &last_t);
new_c = container_ior(last_c, last_t, c, t, &new_t);
- ra_set_container_at_index(ra, size-1, new_c, new_t);
+ ra_set_container_at_index(ra, size - 1, new_c, new_t);
- // Comparison of pointers of different origin is UB (or so claim some compiler
- // makers), so we compare their bit representation only.
+ // Comparison of pointers of different origin is UB (or so claim some
+ // compiler makers), so we compare their bit representation only.
if ((uintptr_t)last_c != (uintptr_t)new_c) {
container_free(last_c, last_t);
}
@@ -17507,25 +21493,24 @@ roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
in_offset = (uint16_t)(offset - container_offset * (1 << 16));
answer = roaring_bitmap_create();
- roaring_bitmap_set_copy_on_write(answer, is_cow(bm));
+ bool cow = is_cow(bm);
+ roaring_bitmap_set_copy_on_write(answer, cow);
ans_ra = &answer->high_low_container;
if (in_offset == 0) {
ans_ra = &answer->high_low_container;
- int i, j; for (i = 0, j = 0; i < length; ++i) {
- int64_t key = ra_get_key_at_index(bm_ra, i);
+ int i, j; for(i = 0, j = 0; i < length; ++i) {
+ int64_t key = ra_get_key_at_index(bm_ra, (uint16_t)i);
key += container_offset;
if (key < 0 || key >= (1 << 16)) {
continue;
}
-
- ra_append_copy(ans_ra, bm_ra, i, false);
- ans_ra->keys[j++] = key;
+ ra_append_copy(ans_ra, bm_ra, (uint16_t)i, cow);
+ ans_ra->keys[j++] = (uint16_t)key;
}
-
return answer;
}
@@ -17534,33 +21519,37 @@ roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
container_t *lo, *hi, **lo_ptr, **hi_ptr;
int64_t k;
- int i; for (i = 0; i < length; ++i) {
+ int i; for(i = 0; i < length; ++i) {
lo = hi = NULL;
lo_ptr = hi_ptr = NULL;
- k = ra_get_key_at_index(bm_ra, i)+container_offset;
+ k = ra_get_key_at_index(bm_ra, (uint16_t)i) + container_offset;
if (k >= 0 && k < (1 << 16)) {
lo_ptr = &lo;
}
- if (k+1 >= 0 && k+1 < (1 << 16)) {
+ if (k + 1 >= 0 && k + 1 < (1 << 16)) {
hi_ptr = &hi;
}
if (lo_ptr == NULL && hi_ptr == NULL) {
continue;
}
-
- c = ra_get_container_at_index(bm_ra, i, &t);
+ c = ra_get_container_at_index(bm_ra, (uint16_t)i, &t);
c = container_unwrap_shared(c, &t);
container_add_offset(c, t, lo_ptr, hi_ptr, in_offset);
if (lo != NULL) {
- offset_append_with_merge(ans_ra, k, lo, t);
+ offset_append_with_merge(ans_ra, (int)k, lo, t);
}
if (hi != NULL) {
- ra_append(ans_ra, k+1, hi, t);
+ ra_append(ans_ra, (uint16_t)(k + 1), hi, t);
}
+ // the `lo` and `hi` container type always keep same as container `c`.
+ // in the case of `container_add_offset` on bitset container, `lo` and
+ // `hi` may has small cardinality, they must be repaired to array
+ // container.
}
+ roaring_bitmap_repair_after_lazy(answer); // do required type conversions.
return answer;
}
@@ -17581,25 +21570,23 @@ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
container_t *c;
if (bitsetconversion &&
(get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) &&
- (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)
- ){
+ (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)) {
container_t *newc1 =
container_mutable_unwrap_shared(c1, &type1);
newc1 = container_to_bitset(newc1, type1);
type1 = BITSET_CONTAINER_TYPE;
- c = container_lazy_ior(newc1, type1, c2, type2,
- &result_type);
+ c = container_lazy_ior(newc1, type1, c2, type2, &result_type);
if (c != newc1) { // should not happen
container_free(newc1, type1);
}
@@ -17615,12 +21602,12 @@ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
c1 = get_copy_of_container(c1, &type1, is_cow(x1));
if (is_cow(x1)) {
ra_set_container_at_index(&x1->high_low_container, pos1, c1,
@@ -17629,11 +21616,11 @@ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s1, c1, type1);
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
ra_set_container_at_index(&x2->high_low_container, pos2, c2,
@@ -17642,7 +21629,7 @@ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s2, c2, type2);
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -17672,16 +21659,15 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
}
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
if (!container_is_full(c1, type1)) {
if ((bitsetconversion == false) ||
- (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)
- ){
+ (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)) {
c1 = get_writable_copy_if_shared(c1, &type1);
} else {
// convert to bitset
@@ -17694,9 +21680,9 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
}
container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_lazy_ior(c1, type1, c2, type2,
- &result_type);
+ &x2->high_low_container, (uint16_t)pos2, &type2);
+ container_t *c =
+ container_lazy_ior(c1, type1, c2, type2, &result_type);
if (c != c1) { // in this instance a new container was created,
// and we need to free the old one
@@ -17710,17 +21696,17 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
// container_t *c2_clone = container_clone(c2, type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
@@ -17733,7 +21719,7 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
length1++;
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -17758,16 +21744,16 @@ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- container_t *c = container_lazy_xor(
- c1, type1, c2, type2, &result_type);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
+ container_t *c =
+ container_lazy_xor(c1, type1, c2, type2, &result_type);
if (container_nonzero_cardinality(c, result_type)) {
ra_append(&answer->high_low_container, s1, c, result_type);
@@ -17779,12 +21765,12 @@ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
c1 = get_copy_of_container(c1, &type1, is_cow(x1));
if (is_cow(x1)) {
ra_set_container_at_index(&x1->high_low_container, pos1, c1,
@@ -17793,11 +21779,11 @@ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s1, c1, type1);
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
ra_set_container_at_index(&x2->high_low_container, pos2, c2,
@@ -17806,7 +21792,7 @@ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
ra_append(&answer->high_low_container, s2, c2, type2);
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -17836,27 +21822,26 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
}
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
- // We do the computation "in place" only when c1 is not a shared container.
- // Rationale: using a shared container safely with in place computation would
- // require making a copy and then doing the computation in place which is likely
- // less efficient than avoiding in place entirely and always generating a new
- // container.
+ // We do the computation "in place" only when c1 is not a shared
+ // container. Rationale: using a shared container safely with in
+ // place computation would require making a copy and then doing the
+ // computation in place which is likely less efficient than avoiding
+ // in place entirely and always generating a new container.
container_t *c;
if (type1 == SHARED_CONTAINER_TYPE) {
c = container_lazy_xor(c1, type1, c2, type2, &result_type);
shared_container_free(CAST_shared(c1)); // release
- }
- else {
+ } else {
c = container_lazy_ixor(c1, type1, c2, type2, &result_type);
}
@@ -17872,17 +21857,17 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
// container_t *c2_clone = container_clone(c2, type2);
c2 = get_copy_of_container(c2, &type2, is_cow(x2));
if (is_cow(x2)) {
@@ -17895,7 +21880,7 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
length1++;
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -17907,7 +21892,7 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
roaring_array_t *ra = &r->high_low_container;
- int i; for (i = 0; i < ra->size; ++i) {
+ int i; for(i = 0; i < ra->size; ++i) {
const uint8_t old_type = ra->typecodes[i];
container_t *old_c = ra->containers[i];
uint8_t new_type = old_type;
@@ -17917,16 +21902,14 @@ void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
}
}
-
-
/**
-* roaring_bitmap_rank returns the number of integers that are smaller or equal
-* to x.
-*/
+ * roaring_bitmap_rank returns the number of integers that are smaller or equal
+ * to x.
+ */
uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
uint64_t size = 0;
uint32_t xhigh = x >> 16;
- int i; for (i = 0; i < bm->high_low_container.size; i++) {
+ int i; for(i = 0; i < bm->high_low_container.size; i++) {
uint32_t key = bm->high_low_container.keys[i];
if (xhigh > key) {
size +=
@@ -17942,11 +21925,66 @@ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
}
return size;
}
+void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t *begin,
+ const uint32_t *end, uint64_t *ans) {
+ uint64_t size = 0;
+
+ int i = 0;
+ const uint32_t *iter = begin;
+ while (i < bm->high_low_container.size && iter != end) {
+ uint32_t x = *iter;
+ uint32_t xhigh = x >> 16;
+ uint32_t key = bm->high_low_container.keys[i];
+ if (xhigh > key) {
+ size +=
+ container_get_cardinality(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i]);
+ i++;
+ } else if (xhigh == key) {
+ uint32_t consumed = container_rank_many(
+ bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i], size, iter, end, ans);
+ iter += consumed;
+ ans += consumed;
+ } else {
+ *(ans++) = size;
+ iter++;
+ }
+ }
+}
/**
-* roaring_bitmap_smallest returns the smallest value in the set.
-* Returns UINT32_MAX if the set is empty.
-*/
+ * roaring_bitmap_get_index returns the index of x, if not exsist return -1.
+ */
+int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) {
+ int64_t index = 0;
+ const uint16_t xhigh = x >> 16;
+ int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh);
+ if (high_idx < 0) return -1;
+
+ int i; for(i = 0; i < bm->high_low_container.size; i++) {
+ uint32_t key = bm->high_low_container.keys[i];
+ if (xhigh > key) {
+ index +=
+ container_get_cardinality(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i]);
+ } else if (xhigh == key) {
+ int32_t low_idx = container_get_index(
+ bm->high_low_container.containers[high_idx],
+ bm->high_low_container.typecodes[high_idx], x & 0xFFFF);
+ if (low_idx < 0) return -1;
+ return index + low_idx;
+ } else {
+ return -1;
+ }
+ }
+ return index;
+}
+
+/**
+ * roaring_bitmap_smallest returns the smallest value in the set.
+ * Returns UINT32_MAX if the set is empty.
+ */
uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
if (bm->high_low_container.size > 0) {
container_t *c = bm->high_low_container.containers[0];
@@ -17959,9 +21997,9 @@ uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
}
/**
-* roaring_bitmap_smallest returns the greatest value in the set.
-* Returns 0 if the set is empty.
-*/
+ * roaring_bitmap_smallest returns the greatest value in the set.
+ * Returns 0 if the set is empty.
+ */
uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
if (bm->high_low_container.size > 0) {
container_t *container =
@@ -18001,44 +22039,45 @@ bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
}
bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
- const roaring_bitmap_t *x2) {
+ const roaring_bitmap_t *x2) {
const int length1 = x1->high_low_container.size,
length2 = x2->high_low_container.size;
uint64_t answer = 0;
int pos1 = 0, pos2 = 0;
while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
+ const uint16_t s1 =
+ ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ const uint16_t s2 =
+ ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
if (s1 == s2) {
uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
- if (container_intersect(c1, type1, c2, type2))
- return true;
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
+ if (container_intersect(c1, type1, c2, type2)) return true;
++pos1;
++pos2;
} else if (s1 < s2) { // s1 < s2
- pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
} else { // s1 > s2
- pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
}
}
return answer != 0;
}
-bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
- uint64_t x, uint64_t y) {
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x,
+ uint64_t y) {
if (x >= y) {
// Empty range.
return false;
}
roaring_uint32_iterator_t it;
- roaring_init_iterator(bm, &it);
- if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) {
+ roaring_iterator_init(bm, &it);
+ if (!roaring_uint32_iterator_move_equalorlarger(&it, (uint32_t)x)) {
// No values above x.
return false;
}
@@ -18049,24 +22088,24 @@ bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
return true;
}
-
uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
const roaring_bitmap_t *x2) {
const int length1 = x1->high_low_container.size,
length2 = x2->high_low_container.size;
uint64_t answer = 0;
int pos1 = 0, pos2 = 0;
-
while (pos1 < length1 && pos2 < length2) {
- const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ const uint16_t s1 =
+ ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ const uint16_t s2 =
+ ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
if (s1 == s2) {
uint8_t type1, type2;
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
answer += container_and_cardinality(c1, type1, c2, type2);
++pos1;
++pos2;
@@ -18110,7 +22149,6 @@ uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
return c1 + c2 - 2 * inter;
}
-
bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
const uint16_t hb = val >> 16;
/*
@@ -18121,60 +22159,63 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
uint8_t typecode;
// next call ought to be cheap
- container_t *container =
- ra_get_container_at_index(&r->high_low_container, i, &typecode);
- // rest might be a tad expensive, possibly involving another round of binary search
+ container_t *container = ra_get_container_at_index(&r->high_low_container,
+ (uint16_t)i, &typecode);
+ // rest might be a tad expensive, possibly involving another round of binary
+ // search
return container_contains(container, val & 0xFFFF, typecode);
}
-
/**
- * Check whether a range of values from range_start (included) to range_end (excluded) is present
+ * Check whether a range of values from range_start (included) to range_end
+ * (excluded) is present
*/
-bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
- if(range_end >= UINT64_C(0x100000000)) {
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
+ uint64_t range_start, uint64_t range_end) {
+ if (range_end >= UINT64_C(0x100000000)) {
range_end = UINT64_C(0x100000000);
}
- if (range_start >= range_end) return true; // empty range are always contained!
- if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
+ if (range_start >= range_end)
+ return true; // empty range are always contained!
+ if (range_end - range_start == 1)
+ return roaring_bitmap_contains(r, (uint32_t)range_start);
uint16_t hb_rs = (uint16_t)(range_start >> 16);
uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
const int32_t span = hb_re - hb_rs;
const int32_t hlc_sz = ra_get_size(&r->high_low_container);
if (hlc_sz < span + 1) {
- return false;
+ return false;
}
int32_t is = ra_get_index(&r->high_low_container, hb_rs);
int32_t ie = ra_get_index(&r->high_low_container, hb_re);
- ie = (ie < 0 ? -ie - 1 : ie);
- if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
- return false;
+ if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
+ return false;
}
const uint32_t lb_rs = range_start & 0xFFFF;
const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
uint8_t type;
- container_t *c = ra_get_container_at_index(&r->high_low_container, is,
- &type);
+ container_t *c =
+ ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type);
if (hb_rs == hb_re) {
- return container_contains_range(c, lb_rs, lb_re, type);
+ return container_contains_range(c, lb_rs, lb_re, type);
}
if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
- return false;
+ return false;
}
- c = ra_get_container_at_index(&r->high_low_container, ie, &type);
+ c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type);
if (!container_contains_range(c, 0, lb_re, type)) {
return false;
}
- int32_t i; for (i = is + 1; i < ie; ++i) {
- c = ra_get_container_at_index(&r->high_low_container, i, &type);
- if (!container_is_full(c, type) ) {
- return false;
+ int32_t i; for(i = is + 1; i < ie; ++i) {
+ c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i,
+ &type);
+ if (!container_is_full(c, type)) {
+ return false;
}
}
return true;
}
-
bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
const roaring_bitmap_t *r2) {
return (roaring_bitmap_get_cardinality(r2) >
@@ -18182,40 +22223,38 @@ bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
roaring_bitmap_is_subset(r1, r2));
}
-
/*
* FROZEN SERIALIZATION FORMAT DESCRIPTION
*
* -- (beginning must be aligned by 32 bytes) --
- * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
- * <run_data> rle16_t[total number of rle elements in all run containers]
- * <array_data> uint16_t[total number of array elements in all array containers]
- * <keys> uint16_t[num_containers]
- * <counts> uint16_t[num_containers]
- * <typecodes> uint8_t[num_containers]
- * <header> uint32_t
+ * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS *
+ * num_bitset_containers] <run_data> rle16_t[total number of rle elements in
+ * all run containers] <array_data> uint16_t[total number of array elements in
+ * all array containers] <keys> uint16_t[num_containers] <counts>
+ * uint16_t[num_containers] <typecodes> uint8_t[num_containers] <header>
+ * uint32_t
*
* <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
* and the number of containers (17 bits).
*
* <counts> stores number of elements for every container.
* Its meaning depends on container type.
- * For array and bitset containers, this value is the container cardinality minus one.
- * For run container, it is the number of rle_t elements (n_runs).
+ * For array and bitset containers, this value is the container cardinality
+ * minus one. For run container, it is the number of rle_t elements (n_runs).
*
* <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
* all containers of respective type.
*
* <*_data> and <keys> are kept close together because they are not accessed
* during deserilization. This may reduce IO in case of large mmaped bitmaps.
- * All members have their native alignments during deserilization except <header>,
- * which is not guaranteed to be aligned by 4 bytes.
+ * All members have their native alignments during deserilization except
+ * <header>, which is not guaranteed to be aligned by 4 bytes.
*/
size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
const roaring_array_t *ra = &rb->high_low_container;
size_t num_bytes = 0;
- int32_t i; for (i = 0; i < ra->size; i++) {
+ int32_t i; for(i = 0; i < ra->size; i++) {
switch (ra->typecodes[i]) {
case BITSET_CONTAINER_TYPE: {
num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
@@ -18228,16 +22267,16 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
}
case ARRAY_CONTAINER_TYPE: {
const array_container_t *ac =
- const_CAST_array(ra->containers[i]);
+ const_CAST_array(ra->containers[i]);
num_bytes += ac->cardinality * sizeof(uint16_t);
break;
}
default:
- __builtin_unreachable();
+ roaring_unreachable;
}
}
- num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
- num_bytes += 4; // header
+ num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
+ num_bytes += 4; // header
return num_bytes;
}
@@ -18258,11 +22297,11 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
size_t bitset_zone_size = 0;
size_t run_zone_size = 0;
size_t array_zone_size = 0;
- int32_t i; for (i = 0; i < ra->size; i++) {
+ int32_t i; for(i = 0; i < ra->size; i++) {
switch (ra->typecodes[i]) {
case BITSET_CONTAINER_TYPE: {
bitset_zone_size +=
- BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
break;
}
case RUN_CONTAINER_TYPE: {
@@ -18272,36 +22311,38 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
}
case ARRAY_CONTAINER_TYPE: {
const array_container_t *ac =
- const_CAST_array(ra->containers[i]);
+ const_CAST_array(ra->containers[i]);
array_zone_size += ac->cardinality * sizeof(uint16_t);
break;
}
default:
- __builtin_unreachable();
+ roaring_unreachable;
}
}
uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
- uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
- uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size);
+ uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size);
uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);
- for (i = 0; i < ra->size; i++) {
+ for(i = 0; i < ra->size; i++) {
uint16_t count;
switch (ra->typecodes[i]) {
case BITSET_CONTAINER_TYPE: {
const bitset_container_t *bc =
- const_CAST_bitset(ra->containers[i]);
+ const_CAST_bitset(ra->containers[i]);
memcpy(bitset_zone, bc->words,
BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) {
- count = bc->cardinality - 1;
+ count = (uint16_t)(bc->cardinality - 1);
} else {
- count = bitset_container_compute_cardinality(bc) - 1;
+ count =
+ (uint16_t)(bitset_container_compute_cardinality(bc) -
+ 1);
}
break;
}
@@ -18310,20 +22351,20 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
size_t num_bytes = rc->n_runs * sizeof(rle16_t);
memcpy(run_zone, rc->runs, num_bytes);
run_zone += rc->n_runs;
- count = rc->n_runs;
+ count = (uint16_t)rc->n_runs;
break;
}
case ARRAY_CONTAINER_TYPE: {
const array_container_t *ac =
- const_CAST_array(ra->containers[i]);
+ const_CAST_array(ra->containers[i]);
size_t num_bytes = ac->cardinality * sizeof(uint16_t);
memcpy(array_zone, ac->array, num_bytes);
array_zone += ac->cardinality;
- count = ac->cardinality - 1;
+ count = (uint16_t)(ac->cardinality - 1);
break;
}
default:
- __builtin_unreachable();
+ roaring_unreachable;
}
memcpy(&count_zone[i], &count, 2);
}
@@ -18333,8 +22374,8 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
memcpy(header_zone, &header, 4);
}
-const roaring_bitmap_t *
-roaring_bitmap_frozen_view(const char *buf, size_t length) {
+const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
+ size_t length) {
if ((uintptr_t)buf % 32 != 0) {
return NULL;
}
@@ -18344,7 +22385,7 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
return NULL;
}
uint32_t header;
- memcpy(&header, buf + length - 4, 4); // header may be misaligned
+ memcpy(&header, buf + length - 4, 4); // header may be misaligned
if ((header & 0x7FFF) != FROZEN_COOKIE) {
return NULL;
}
@@ -18365,11 +22406,12 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
size_t bitset_zone_size = 0;
size_t run_zone_size = 0;
size_t array_zone_size = 0;
- int32_t i; for (i = 0; i < num_containers; i++) {
+ int32_t i; for(i = 0; i < num_containers; i++) {
switch (typecodes[i]) {
case BITSET_CONTAINER_TYPE:
num_bitset_containers++;
- bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ bitset_zone_size +=
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
break;
case RUN_CONTAINER_TYPE:
num_run_containers++;
@@ -18384,16 +22426,16 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
}
}
if (length != bitset_zone_size + run_zone_size + array_zone_size +
- 5 * num_containers + 4) {
+ 5 * num_containers + 4) {
return NULL;
}
- uint64_t *bitset_zone = (uint64_t*) (buf);
- rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
- uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
+ uint64_t *bitset_zone = (uint64_t *)(buf);
+ rle16_t *run_zone = (rle16_t *)(buf + bitset_zone_size);
+ uint16_t *array_zone = (uint16_t *)(buf + bitset_zone_size + run_zone_size);
size_t alloc_size = 0;
alloc_size += sizeof(roaring_bitmap_t);
- alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_containers * sizeof(container_t *);
alloc_size += num_bitset_containers * sizeof(bitset_container_t);
alloc_size += num_run_containers * sizeof(run_container_t);
alloc_size += num_array_containers * sizeof(array_container_t);
@@ -18403,16 +22445,15 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
return NULL;
}
- roaring_bitmap_t *rb = (roaring_bitmap_t *)
- arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ roaring_bitmap_t *rb =
+ (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t));
rb->high_low_container.flags = ROARING_FLAG_FROZEN;
rb->high_low_container.allocation_size = num_containers;
rb->high_low_container.size = num_containers;
rb->high_low_container.keys = (uint16_t *)keys;
rb->high_low_container.typecodes = (uint8_t *)typecodes;
- rb->high_low_container.containers =
- (container_t **)arena_alloc(&arena,
- sizeof(container_t*) * num_containers);
+ rb->high_low_container.containers = (container_t **)arena_alloc(
+ &arena, sizeof(container_t *) * num_containers);
// Ensure offset of high_low_container.containers is known distance used in
// C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the
// only allocation that precedes high_low_container.containers. If this is
@@ -18421,11 +22462,11 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
assert(rb ==
(roaring_bitmap_t *)((char *)rb->high_low_container.containers -
sizeof(roaring_bitmap_t)));
- for (i = 0; i < num_containers; i++) {
+ for(i = 0; i < num_containers; i++) {
switch (typecodes[i]) {
case BITSET_CONTAINER_TYPE: {
- bitset_container_t *bitset = (bitset_container_t *)
- arena_alloc(&arena, sizeof(bitset_container_t));
+ bitset_container_t *bitset = (bitset_container_t *)arena_alloc(
+ &arena, sizeof(bitset_container_t));
bitset->words = bitset_zone;
bitset->cardinality = counts[i] + UINT32_C(1);
rb->high_low_container.containers[i] = bitset;
@@ -18433,8 +22474,8 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
break;
}
case RUN_CONTAINER_TYPE: {
- run_container_t *run = (run_container_t *)
- arena_alloc(&arena, sizeof(run_container_t));
+ run_container_t *run = (run_container_t *)arena_alloc(
+ &arena, sizeof(run_container_t));
run->capacity = counts[i];
run->n_runs = counts[i];
run->runs = run_zone;
@@ -18443,8 +22484,8 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
break;
}
case ARRAY_CONTAINER_TYPE: {
- array_container_t *array = (array_container_t *)
- arena_alloc(&arena, sizeof(array_container_t));
+ array_container_t *array = (array_container_t *)arena_alloc(
+ &arena, sizeof(array_container_t));
array->capacity = counts[i] + UINT32_C(1);
array->cardinality = counts[i] + UINT32_C(1);
array->array = array_zone;
@@ -18463,7 +22504,7 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
ALLOW_UNALIGNED
roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
- char *start_of_buf = (char *) buf;
+ char *start_of_buf = (char *)buf;
uint32_t cookie;
int32_t num_containers;
uint16_t *descriptive_headers;
@@ -18477,9 +22518,9 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) {
memcpy(&num_containers, buf, sizeof(int32_t));
buf += sizeof(int32_t);
- descriptive_headers = (uint16_t *) buf;
+ descriptive_headers = (uint16_t *)buf;
buf += num_containers * 2 * sizeof(uint16_t);
- offset_headers = (uint32_t *) buf;
+ offset_headers = (uint32_t *)buf;
buf += num_containers * sizeof(uint32_t);
} else if ((cookie & 0xFFFF) == SERIAL_COOKIE) {
num_containers = (cookie >> 16) + 1;
@@ -18487,10 +22528,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
int32_t run_flag_bitset_size = (num_containers + 7) / 8;
run_flag_bitset = buf;
buf += run_flag_bitset_size;
- descriptive_headers = (uint16_t *) buf;
+ descriptive_headers = (uint16_t *)buf;
buf += num_containers * 2 * sizeof(uint16_t);
- if(num_containers >= NO_OFFSET_THRESHOLD) {
- offset_headers = (uint32_t *) buf;
+ if (num_containers >= NO_OFFSET_THRESHOLD) {
+ offset_headers = (uint32_t *)buf;
buf += num_containers * sizeof(uint32_t);
}
} else {
@@ -18502,17 +22543,17 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
int32_t num_run_containers = 0;
int32_t num_array_containers = 0;
- int32_t i; for (i = 0; i < num_containers; i++) {
+ int32_t i; for(i = 0; i < num_containers; i++) {
uint16_t tmp;
- memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp));
uint32_t cardinality = tmp + 1;
bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
bool isrun = false;
- if(hasrun) {
- if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
- isbitmap = false;
- isrun = true;
- }
+ if (hasrun) {
+ if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
}
if (isbitmap) {
@@ -18526,12 +22567,12 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
size_t alloc_size = 0;
alloc_size += sizeof(roaring_bitmap_t);
- alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_containers * sizeof(container_t *);
alloc_size += num_bitset_containers * sizeof(bitset_container_t);
alloc_size += num_run_containers * sizeof(run_container_t);
alloc_size += num_array_containers * sizeof(array_container_t);
- alloc_size += num_containers * sizeof(uint16_t); // keys
- alloc_size += num_containers * sizeof(uint8_t); // typecodes
+ alloc_size += num_containers * sizeof(uint16_t); // keys
+ alloc_size += num_containers * sizeof(uint8_t); // typecodes
// allocate bitmap and construct containers
char *arena = (char *)roaring_malloc(alloc_size);
@@ -18539,73 +22580,79 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
return NULL;
}
- roaring_bitmap_t *rb = (roaring_bitmap_t *)
- arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ roaring_bitmap_t *rb =
+ (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t));
rb->high_low_container.flags = ROARING_FLAG_FROZEN;
rb->high_low_container.allocation_size = num_containers;
rb->high_low_container.size = num_containers;
- rb->high_low_container.containers =
- (container_t **)arena_alloc(&arena,
- sizeof(container_t*) * num_containers);
+ rb->high_low_container.containers = (container_t **)arena_alloc(
+ &arena, sizeof(container_t *) * num_containers);
- uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
- uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));
+ uint16_t *keys =
+ (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
+ uint8_t *typecodes =
+ (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));
rb->high_low_container.keys = keys;
rb->high_low_container.typecodes = typecodes;
- for (i = 0; i < num_containers; i++) {
+ for(i = 0; i < num_containers; i++) {
uint16_t tmp;
- memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp));
int32_t cardinality = tmp + 1;
bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
bool isrun = false;
- if(hasrun) {
- if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
- isbitmap = false;
- isrun = true;
- }
+ if (hasrun) {
+ if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
}
- keys[i] = descriptive_headers[2*i];
+ keys[i] = descriptive_headers[2 * i];
if (isbitmap) {
typecodes[i] = BITSET_CONTAINER_TYPE;
- bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t));
+ bitset_container_t *c = (bitset_container_t *)arena_alloc(
+ &arena, sizeof(bitset_container_t));
c->cardinality = cardinality;
- if(offset_headers != NULL) {
- c->words = (uint64_t *) (start_of_buf + offset_headers[i]);
+ if (offset_headers != NULL) {
+ c->words = (uint64_t *)(start_of_buf + offset_headers[i]);
} else {
- c->words = (uint64_t *) buf;
+ c->words = (uint64_t *)buf;
buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
}
rb->high_low_container.containers[i] = c;
} else if (isrun) {
typecodes[i] = RUN_CONTAINER_TYPE;
- run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
+ run_container_t *c =
+ (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
c->capacity = cardinality;
uint16_t n_runs;
- if(offset_headers != NULL) {
- memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t));
+ if (offset_headers != NULL) {
+ memcpy(&n_runs, start_of_buf + offset_headers[i],
+ sizeof(uint16_t));
c->n_runs = n_runs;
- c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t));
+ c->runs = (rle16_t *)(start_of_buf + offset_headers[i] +
+ sizeof(uint16_t));
} else {
memcpy(&n_runs, buf, sizeof(uint16_t));
c->n_runs = n_runs;
buf += sizeof(uint16_t);
- c->runs = (rle16_t *) buf;
+ c->runs = (rle16_t *)buf;
buf += c->n_runs * sizeof(rle16_t);
}
rb->high_low_container.containers[i] = c;
} else {
typecodes[i] = ARRAY_CONTAINER_TYPE;
- array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t));
+ array_container_t *c = (array_container_t *)arena_alloc(
+ &arena, sizeof(array_container_t));
c->cardinality = cardinality;
c->capacity = cardinality;
- if(offset_headers != NULL) {
- c->array = (uint16_t *) (start_of_buf + offset_headers[i]);
+ if (offset_headers != NULL) {
+ c->array = (uint16_t *)(start_of_buf + offset_headers[i]);
} else {
- c->array = (uint16_t *) buf;
+ c->array = (uint16_t *)buf;
buf += cardinality * sizeof(uint16_t);
}
rb->high_low_container.containers[i] = c;
@@ -18615,22 +22662,2154 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
return rb;
}
+bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) {
+ uint32_t max_value = roaring_bitmap_maximum(r);
+ size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64);
+ bool resize_ok = bitset_resize(bitset, new_array_size, true);
+ if (!resize_ok) {
+ return false;
+ }
+ const roaring_array_t *ra = &r->high_low_container;
+ int i; for(i = 0; i < ra->size; ++i) {
+ uint64_t *words = bitset->array + (ra->keys[i] << 10);
+ uint8_t type = ra->typecodes[i];
+ const container_t *c = ra->containers[i];
+ if (type == SHARED_CONTAINER_TYPE) {
+ c = container_unwrap_shared(c, &type);
+ }
+ switch (type) {
+ case BITSET_CONTAINER_TYPE: {
+ size_t max_word_index = new_array_size - (ra->keys[i] << 10);
+ if (max_word_index > 1024) {
+ max_word_index = 1024;
+ }
+ const bitset_container_t *src = const_CAST_bitset(c);
+ memcpy(words, src->words, max_word_index * sizeof(uint64_t));
+ } break;
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *src = const_CAST_array(c);
+ bitset_set_list(words, src->array, src->cardinality);
+ } break;
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *src = const_CAST_run(c);
+ int32_t rlepos; for(rlepos = 0; rlepos < src->n_runs; ++rlepos) {
+ rle16_t rle = src->runs[rlepos];
+ bitset_set_lenrange(words, rle.value, rle.length);
+ }
+ } break;
+ default:
+ roaring_unreachable;
+ }
+ }
+ return true;
+}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring {
+}
+}
+} // extern "C" { namespace roaring {
#endif
/* end file src/roaring.c */
+/* begin file src/roaring64.c */
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <string.h>
+
+
+// For serialization / deserialization
+// containers.h last to avoid conflict with ROARING_CONTAINER_T.
+
+#ifdef __cplusplus
+using namespace ::roaring::internal;
+
+extern "C" {
+namespace roaring {
+namespace api {
+#endif
+
+// TODO: Copy on write.
+// TODO: Error on failed allocation.
+
+typedef struct roaring64_bitmap_s {
+ art_t art;
+ uint8_t flags;
+} roaring64_bitmap_t;
+
+// Leaf type of the ART used to keep the high 48 bits of each entry.
+typedef struct roaring64_leaf_s {
+ art_val_t _pad;
+ uint8_t typecode;
+ container_t *container;
+} roaring64_leaf_t;
+
+// Alias to make it easier to work with, since it's an internal-only type
+// anyway.
+typedef struct roaring64_leaf_s leaf_t;
+
+// Iterator struct to hold iteration state.
+typedef struct roaring64_iterator_s {
+ const roaring64_bitmap_t *parent;
+ art_iterator_t art_it;
+ roaring_container_iterator_t container_it;
+ uint64_t high48; // Key that art_it points to.
+
+ uint64_t value;
+ bool has_value;
+
+ // If has_value is false, then the iterator is saturated. This field
+ // indicates the direction of saturation. If true, there are no more values
+ // in the forward direction. If false, there are no more values in the
+ // backward direction.
+ bool saturated_forward;
+} roaring64_iterator_t;
+
+// Splits the given uint64 key into high 48 bit and low 16 bit components.
+// Expects high48_out to be of length ART_KEY_BYTES.
+static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) {
+ uint64_t tmp = croaring_htobe64(key);
+ memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES);
+ return (uint16_t)key;
+}
+
+// Recombines the high 48 bit and low 16 bit components into a uint64 key.
+// Expects high48_out to be of length ART_KEY_BYTES.
+static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) {
+ uint64_t result = 0;
+ memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES);
+ return croaring_be64toh(result) | low16;
+}
+
+static inline uint64_t minimum(uint64_t a, uint64_t b) {
+ return (a < b) ? a : b;
+}
+
+static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) {
+ leaf_t *leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t));
+ leaf->container = container;
+ leaf->typecode = typecode;
+ return leaf;
+}
+
+static inline leaf_t *copy_leaf_container(const leaf_t *leaf) {
+ leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t));
+ result_leaf->typecode = leaf->typecode;
+ // get_copy_of_container modifies the typecode passed in.
+ result_leaf->container = get_copy_of_container(
+ leaf->container, &result_leaf->typecode, /*copy_on_write=*/false);
+ return result_leaf;
+}
+
+static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); }
+
+static inline int compare_high48(art_key_chunk_t key1[],
+ art_key_chunk_t key2[]) {
+ return art_compare_keys(key1, key2);
+}
+
+static inline bool roaring64_iterator_init_at_leaf_first(
+ roaring64_iterator_t *it) {
+ it->high48 = combine_key(it->art_it.key, 0);
+ leaf_t *leaf = (leaf_t *)it->art_it.value;
+ uint16_t low16 = 0;
+ it->container_it =
+ container_init_iterator(leaf->container, leaf->typecode, &low16);
+ it->value = it->high48 | low16;
+ return (it->has_value = true);
+}
+
+static inline bool roaring64_iterator_init_at_leaf_last(
+ roaring64_iterator_t *it) {
+ it->high48 = combine_key(it->art_it.key, 0);
+ leaf_t *leaf = (leaf_t *)it->art_it.value;
+ uint16_t low16 = 0;
+ it->container_it =
+ container_init_iterator_last(leaf->container, leaf->typecode, &low16);
+ it->value = it->high48 | low16;
+ return (it->has_value = true);
+}
+
+static inline roaring64_iterator_t *roaring64_iterator_init_at(
+ const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) {
+ it->parent = r;
+ it->art_it = art_init_iterator(&r->art, first);
+ it->has_value = it->art_it.value != NULL;
+ if (it->has_value) {
+ if (first) {
+ roaring64_iterator_init_at_leaf_first(it);
+ } else {
+ roaring64_iterator_init_at_leaf_last(it);
+ }
+ } else {
+ it->saturated_forward = first;
+ }
+ return it;
+}
+
+roaring64_bitmap_t *roaring64_bitmap_create(void) {
+ roaring64_bitmap_t *r =
+ (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t));
+ r->art.root = NULL;
+ r->flags = 0;
+ return r;
+}
+
+void roaring64_bitmap_free(roaring64_bitmap_t *r) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ container_free(leaf->container, leaf->typecode);
+ free_leaf(leaf);
+ art_iterator_next(&it);
+ }
+ art_free(&r->art);
+ roaring_free(r);
+}
+
+roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) {
+ roaring64_bitmap_t *result = roaring64_bitmap_create();
+
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ uint8_t result_typecode = leaf->typecode;
+ container_t *result_container = get_copy_of_container(
+ leaf->container, &result_typecode, /*copy_on_write=*/false);
+ leaf_t *result_leaf = create_leaf(result_container, result_typecode);
+ art_insert(&result->art, it.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it);
+ }
+ return result;
+}
+
+roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max,
+ uint64_t step) {
+ if (step == 0 || max <= min) {
+ return NULL;
+ }
+ roaring64_bitmap_t *r = roaring64_bitmap_create();
+ if (step >= (1 << 16)) {
+ // Only one value per container.
+ uint64_t value; for(value = min; value < max; value += step) {
+ roaring64_bitmap_add(r, value);
+ if (value > UINT64_MAX - step) {
+ break;
+ }
+ }
+ return r;
+ }
+ do {
+ uint64_t high_bits = min & 0xFFFFFFFFFFFF0000;
+ uint16_t container_min = min & 0xFFFF;
+ uint32_t container_max = (uint32_t)minimum(max - high_bits, 1 << 16);
+
+ uint8_t typecode;
+ container_t *container = container_from_range(
+ &typecode, container_min, container_max, (uint16_t)step);
+
+ uint8_t high48[ART_KEY_BYTES];
+ split_key(min, high48);
+ leaf_t *leaf = create_leaf(container, typecode);
+ art_insert(&r->art, high48, (art_val_t *)leaf);
+
+ uint64_t gap = container_max - container_min + step - 1;
+ uint64_t increment = gap - (gap % step);
+ if (min > UINT64_MAX - increment) {
+ break;
+ }
+ min += increment;
+ } while (min < max);
+ return r;
+}
+
+roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args,
+ const uint64_t *vals) {
+ roaring64_bitmap_t *r = roaring64_bitmap_create();
+ roaring64_bitmap_add_many(r, n_args, vals);
+ return r;
+}
+
+roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) {
+ roaring64_bitmap_t *r = roaring64_bitmap_create();
+ roaring64_bulk_context_t context; memset(&context, 0, sizeof(context));
+ va_list ap;
+ va_start(ap, n_args);
+ size_t i; for(i = 0; i < n_args; i++) {
+ uint64_t val = va_arg(ap, uint64_t);
+ roaring64_bitmap_add_bulk(r, &context, val);
+ }
+ va_end(ap);
+ return r;
+}
+
+static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r,
+ uint8_t *high48,
+ uint16_t low16,
+ leaf_t *leaf) {
+ if (leaf != NULL) {
+ uint8_t typecode2;
+ container_t *container2 =
+ container_add(leaf->container, low16, leaf->typecode, &typecode2);
+ if (container2 != leaf->container) {
+ container_free(leaf->container, leaf->typecode);
+ leaf->container = container2;
+ leaf->typecode = typecode2;
+ }
+ return leaf;
+ } else {
+ array_container_t *ac = array_container_create();
+ uint8_t typecode;
+ container_t *container =
+ container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode);
+ assert(ac == container);
+ leaf = create_leaf(container, typecode);
+ art_insert(&r->art, high48, (art_val_t *)leaf);
+ return leaf;
+ }
+}
+
+void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+ leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
+ containerptr_roaring64_bitmap_add(r, high48, low16, leaf);
+}
+
+bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+ leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
+
+ int old_cardinality = 0;
+ if (leaf != NULL) {
+ old_cardinality =
+ container_get_cardinality(leaf->container, leaf->typecode);
+ }
+ leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf);
+ int new_cardinality =
+ container_get_cardinality(leaf->container, leaf->typecode);
+ return old_cardinality != new_cardinality;
+}
+
+void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r,
+ roaring64_bulk_context_t *context,
+ uint64_t val) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+ if (context->leaf != NULL &&
+ compare_high48(context->high_bytes, high48) == 0) {
+ // We're at a container with the correct high bits.
+ uint8_t typecode2;
+ container_t *container2 =
+ container_add(context->leaf->container, low16,
+ context->leaf->typecode, &typecode2);
+ if (container2 != context->leaf->container) {
+ container_free(context->leaf->container, context->leaf->typecode);
+ context->leaf->container = container2;
+ context->leaf->typecode = typecode2;
+ }
+ } else {
+ // We're not positioned anywhere yet or the high bits of the key
+ // differ.
+ leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
+ context->leaf =
+ containerptr_roaring64_bitmap_add(r, high48, low16, leaf);
+ memcpy(context->high_bytes, high48, ART_KEY_BYTES);
+ }
+}
+
+void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args,
+ const uint64_t *vals) {
+ if (n_args == 0) {
+ return;
+ }
+ const uint64_t *end = vals + n_args;
+ roaring64_bulk_context_t context; memset(&context, 0, sizeof(context));
+ const uint64_t *current_val; for(current_val = vals; current_val != end;
+ current_val++) {
+ roaring64_bitmap_add_bulk(r, &context, *current_val);
+ }
+}
+
+static inline void add_range_closed_at(art_t *art, uint8_t *high48,
+ uint16_t min, uint16_t max) {
+ leaf_t *leaf = (leaf_t *)art_find(art, high48);
+ if (leaf != NULL) {
+ uint8_t typecode2;
+ container_t *container2 = container_add_range(
+ leaf->container, leaf->typecode, min, max, &typecode2);
+ if (container2 != leaf->container) {
+ container_free(leaf->container, leaf->typecode);
+ leaf->container = container2;
+ leaf->typecode = typecode2;
+ }
+ return;
+ }
+ uint8_t typecode;
+ // container_add_range is inclusive, but `container_range_of_ones` is
+ // exclusive.
+ container_t *container = container_range_of_ones(min, max + 1, &typecode);
+ leaf = create_leaf(container, typecode);
+ art_insert(art, high48, (art_val_t *)leaf);
+}
+
+void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min >= max) {
+ return;
+ }
+ roaring64_bitmap_add_range_closed(r, min, max - 1);
+}
+
+void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min > max) {
+ return;
+ }
+
+ art_t *art = &r->art;
+ uint8_t min_high48[ART_KEY_BYTES];
+ uint16_t min_low16 = split_key(min, min_high48);
+ uint8_t max_high48[ART_KEY_BYTES];
+ uint16_t max_low16 = split_key(max, max_high48);
+ if (compare_high48(min_high48, max_high48) == 0) {
+ // Only populate range within one container.
+ add_range_closed_at(art, min_high48, min_low16, max_low16);
+ return;
+ }
+
+ // Populate a range across containers. Fill intermediate containers
+ // entirely.
+ add_range_closed_at(art, min_high48, min_low16, 0xffff);
+ uint64_t min_high_bits = min >> 16;
+ uint64_t max_high_bits = max >> 16;
+ uint64_t current; for(current = min_high_bits + 1; current < max_high_bits;
+ ++current) {
+ uint8_t current_high48[ART_KEY_BYTES];
+ split_key(current << 16, current_high48);
+ add_range_closed_at(art, current_high48, 0, 0xffff);
+ }
+ add_range_closed_at(art, max_high48, 0, max_low16);
+}
+
+bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+ leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
+ if (leaf != NULL) {
+ return container_contains(leaf->container, low16, leaf->typecode);
+ }
+ return false;
+}
+
+bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min >= max) {
+ return true;
+ }
+
+ uint8_t min_high48[ART_KEY_BYTES];
+ uint16_t min_low16 = split_key(min, min_high48);
+ uint8_t max_high48[ART_KEY_BYTES];
+ uint16_t max_low16 = split_key(max, max_high48);
+ uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive
+
+ art_iterator_t it = art_lower_bound(&r->art, min_high48);
+ if (it.value == NULL || combine_key(it.key, 0) > min) {
+ return false;
+ }
+ uint64_t prev_high48_bits = min & 0xFFFFFFFFFFFF0000;
+ while (it.value != NULL) {
+ uint64_t current_high48_bits = combine_key(it.key, 0);
+ if (current_high48_bits > max_high48_bits) {
+ // We've passed the end of the range with all containers containing
+ // the range.
+ return true;
+ }
+ if (current_high48_bits - prev_high48_bits > 0x10000) {
+ // There is a gap in the iterator that falls in the range.
+ return false;
+ }
+
+ leaf_t *leaf = (leaf_t *)it.value;
+ uint32_t container_min = 0;
+ if (compare_high48(it.key, min_high48) == 0) {
+ container_min = min_low16;
+ }
+ uint32_t container_max = 0xFFFF + 1; // Exclusive
+ if (compare_high48(it.key, max_high48) == 0) {
+ container_max = max_low16;
+ }
+
+ // For the first and last containers we use container_contains_range,
+ // for the intermediate containers we can use container_is_full.
+ if (container_min == 0 && container_max == 0xFFFF + 1) {
+ if (!container_is_full(leaf->container, leaf->typecode)) {
+ return false;
+ }
+ } else if (!container_contains_range(leaf->container, container_min,
+ container_max, leaf->typecode)) {
+ return false;
+ }
+ prev_high48_bits = current_high48_bits;
+ art_iterator_next(&it);
+ }
+ return prev_high48_bits == max_high48_bits;
+}
+
+bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r,
+ roaring64_bulk_context_t *context,
+ uint64_t val) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+
+ if (context->leaf == NULL || context->high_bytes != high48) {
+ // We're not positioned anywhere yet or the high bits of the key
+ // differ.
+ leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
+ if (leaf == NULL) {
+ return false;
+ }
+ context->leaf = leaf;
+ memcpy(context->high_bytes, high48, ART_KEY_BYTES);
+ }
+ return container_contains(context->leaf->container, low16,
+ context->leaf->typecode);
+}
+
+bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank,
+ uint64_t *element) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint64_t start_rank = 0;
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ uint64_t cardinality =
+ container_get_cardinality(leaf->container, leaf->typecode);
+ if (start_rank + cardinality > rank) {
+ uint32_t uint32_start = 0;
+ uint32_t uint32_rank = rank - start_rank;
+ uint32_t uint32_element = 0;
+ if (container_select(leaf->container, leaf->typecode, &uint32_start,
+ uint32_rank, &uint32_element)) {
+ *element = combine_key(it.key, (uint16_t)uint32_element);
+ return true;
+ }
+ return false;
+ }
+ start_rank += cardinality;
+ art_iterator_next(&it);
+ }
+ return false;
+}
+
+uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint64_t rank = 0;
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ int compare_result = compare_high48(it.key, high48);
+ if (compare_result < 0) {
+ rank += container_get_cardinality(leaf->container, leaf->typecode);
+ } else if (compare_result == 0) {
+ return rank +
+ container_rank(leaf->container, leaf->typecode, low16);
+ } else {
+ return rank;
+ }
+ art_iterator_next(&it);
+ }
+ return rank;
+}
+
+bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val,
+ uint64_t *out_index) {
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint64_t index = 0;
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ int compare_result = compare_high48(it.key, high48);
+ if (compare_result < 0) {
+ index += container_get_cardinality(leaf->container, leaf->typecode);
+ } else if (compare_result == 0) {
+ int index16 =
+ container_get_index(leaf->container, leaf->typecode, low16);
+ if (index16 < 0) {
+ return false;
+ }
+ *out_index = index + index16;
+ return true;
+ } else {
+ return false;
+ }
+ art_iterator_next(&it);
+ }
+ return false;
+}
+
+static inline leaf_t *containerptr_roaring64_bitmap_remove(
+ roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) {
+ if (leaf == NULL) {
+ return NULL;
+ }
+
+ container_t *container = leaf->container;
+ uint8_t typecode = leaf->typecode;
+ uint8_t typecode2;
+ container_t *container2 =
+ container_remove(container, low16, typecode, &typecode2);
+ if (container2 != container) {
+ container_free(container, typecode);
+ leaf->container = container2;
+ leaf->typecode = typecode2;
+ }
+ if (!container_nonzero_cardinality(container2, typecode2)) {
+ container_free(container2, typecode2);
+ leaf = (leaf_t *)art_erase(&r->art, high48);
+ if (leaf != NULL) {
+ free_leaf(leaf);
+ }
+ return NULL;
+ }
+ return leaf;
+}
+
+void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) {
+ art_t *art = &r->art;
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+
+ leaf_t *leaf = (leaf_t *)art_find(art, high48);
+ containerptr_roaring64_bitmap_remove(r, high48, low16, leaf);
+}
+
+bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) {
+ art_t *art = &r->art;
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+ leaf_t *leaf = (leaf_t *)art_find(art, high48);
+
+ if (leaf == NULL) {
+ return false;
+ }
+ int old_cardinality =
+ container_get_cardinality(leaf->container, leaf->typecode);
+ leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf);
+ if (leaf == NULL) {
+ return true;
+ }
+ int new_cardinality =
+ container_get_cardinality(leaf->container, leaf->typecode);
+ return new_cardinality != old_cardinality;
+}
+
+void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r,
+ roaring64_bulk_context_t *context,
+ uint64_t val) {
+ art_t *art = &r->art;
+ uint8_t high48[ART_KEY_BYTES];
+ uint16_t low16 = split_key(val, high48);
+ if (context->leaf != NULL &&
+ compare_high48(context->high_bytes, high48) == 0) {
+ // We're at a container with the correct high bits.
+ uint8_t typecode2;
+ container_t *container2 =
+ container_remove(context->leaf->container, low16,
+ context->leaf->typecode, &typecode2);
+ if (container2 != context->leaf->container) {
+ container_free(context->leaf->container, context->leaf->typecode);
+ context->leaf->container = container2;
+ context->leaf->typecode = typecode2;
+ }
+ if (!container_nonzero_cardinality(container2, typecode2)) {
+ leaf_t *leaf = (leaf_t *)art_erase(art, high48);
+ container_free(container2, typecode2);
+ free_leaf(leaf);
+ }
+ } else {
+ // We're not positioned anywhere yet or the high bits of the key
+ // differ.
+ leaf_t *leaf = (leaf_t *)art_find(art, high48);
+ context->leaf =
+ containerptr_roaring64_bitmap_remove(r, high48, low16, leaf);
+ memcpy(context->high_bytes, high48, ART_KEY_BYTES);
+ }
+}
+
+void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args,
+ const uint64_t *vals) {
+ if (n_args == 0) {
+ return;
+ }
+ const uint64_t *end = vals + n_args;
+ roaring64_bulk_context_t context; memset(&context, 0, sizeof(context));
+ const uint64_t *current_val; for(current_val = vals; current_val != end;
+ current_val++) {
+ roaring64_bitmap_remove_bulk(r, &context, *current_val);
+ }
+}
+
+static inline void remove_range_closed_at(art_t *art, uint8_t *high48,
+ uint16_t min, uint16_t max) {
+ leaf_t *leaf = (leaf_t *)art_find(art, high48);
+ if (leaf == NULL) {
+ return;
+ }
+ uint8_t typecode2;
+ container_t *container2 = container_remove_range(
+ leaf->container, leaf->typecode, min, max, &typecode2);
+ if (container2 != leaf->container) {
+ container_free(leaf->container, leaf->typecode);
+ if (container2 != NULL) {
+ leaf->container = container2;
+ leaf->typecode = typecode2;
+ } else {
+ art_erase(art, high48);
+ free_leaf(leaf);
+ }
+ }
+}
+
+void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min >= max) {
+ return;
+ }
+ roaring64_bitmap_remove_range_closed(r, min, max - 1);
+}
+
+void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min > max) {
+ return;
+ }
+
+ art_t *art = &r->art;
+ uint8_t min_high48[ART_KEY_BYTES];
+ uint16_t min_low16 = split_key(min, min_high48);
+ uint8_t max_high48[ART_KEY_BYTES];
+ uint16_t max_low16 = split_key(max, max_high48);
+ if (compare_high48(min_high48, max_high48) == 0) {
+ // Only remove a range within one container.
+ remove_range_closed_at(art, min_high48, min_low16, max_low16);
+ return;
+ }
+
+ // Remove a range across containers. Remove intermediate containers
+ // entirely.
+ remove_range_closed_at(art, min_high48, min_low16, 0xffff);
+
+ art_iterator_t it = art_upper_bound(art, min_high48);
+ while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) {
+ leaf_t *leaf = (leaf_t *)art_iterator_erase(art, &it);
+ container_free(leaf->container, leaf->typecode);
+ free_leaf(leaf);
+ }
+ remove_range_closed_at(art, max_high48, 0, max_low16);
+}
+
+uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint64_t cardinality = 0;
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ cardinality +=
+ container_get_cardinality(leaf->container, leaf->typecode);
+ art_iterator_next(&it);
+ }
+ return cardinality;
+}
+
+uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max) {
+ if (min >= max) {
+ return 0;
+ }
+ max--; // A closed range is easier to work with.
+
+ uint64_t cardinality = 0;
+ uint8_t min_high48[ART_KEY_BYTES];
+ uint16_t min_low16 = split_key(min, min_high48);
+ uint8_t max_high48[ART_KEY_BYTES];
+ uint16_t max_low16 = split_key(max, max_high48);
+
+ art_iterator_t it = art_lower_bound(&r->art, min_high48);
+ while (it.value != NULL) {
+ int max_compare_result = compare_high48(it.key, max_high48);
+ if (max_compare_result > 0) {
+ // We're outside the range.
+ break;
+ }
+
+ leaf_t *leaf = (leaf_t *)it.value;
+ if (max_compare_result == 0) {
+ // We're at the max high key, add only the range up to the low
+ // 16 bits of max.
+ cardinality +=
+ container_rank(leaf->container, leaf->typecode, max_low16);
+ } else {
+ // We're not yet at the max high key, add the full container
+ // range.
+ cardinality +=
+ container_get_cardinality(leaf->container, leaf->typecode);
+ }
+ if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) {
+ // We're at the min high key, remove the range up to the low 16
+ // bits of min.
+ cardinality -=
+ container_rank(leaf->container, leaf->typecode, min_low16 - 1);
+ }
+ art_iterator_next(&it);
+ }
+ return cardinality;
+}
+
+bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) {
+ return art_is_empty(&r->art);
+}
+
+uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ if (it.value == NULL) {
+ return UINT64_MAX;
+ }
+ leaf_t *leaf = (leaf_t *)it.value;
+ return combine_key(it.key,
+ container_minimum(leaf->container, leaf->typecode));
+}
+
+uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/false);
+ if (it.value == NULL) {
+ return 0;
+ }
+ leaf_t *leaf = (leaf_t *)it.value;
+ return combine_key(it.key,
+ container_maximum(leaf->container, leaf->typecode));
+}
+
+bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ bool has_run_container = false;
+ while (it.value != NULL) {
+ leaf_t *leaf = (leaf_t *)it.value;
+ uint8_t new_typecode;
+ // We don't need to free the existing container if a new one was
+ // created, convert_run_optimize does that internally.
+ leaf->container = convert_run_optimize(leaf->container, leaf->typecode,
+ &new_typecode);
+ leaf->typecode = new_typecode;
+ has_run_container |= new_typecode == RUN_CONTAINER_TYPE;
+ art_iterator_next(&it);
+ }
+ return has_run_container;
+}
+
+static bool roaring64_leaf_internal_validate(const art_val_t *val,
+ const char **reason) {
+ leaf_t *leaf = (leaf_t *)val;
+ return container_internal_validate(leaf->container, leaf->typecode, reason);
+}
+
+bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r,
+ const char **reason) {
+ return art_internal_validate(&r->art, reason,
+ roaring64_leaf_internal_validate);
+}
+
+bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL && it2.value != NULL) {
+ if (compare_high48(it1.key, it2.key) != 0) {
+ return false;
+ }
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ if (!container_equals(leaf1->container, leaf1->typecode,
+ leaf2->container, leaf2->typecode)) {
+ return false;
+ }
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ }
+ return it1.value == NULL && it2.value == NULL;
+}
+
+bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL) {
+ bool it2_present = it2.value != NULL;
+
+ int compare_result = 0;
+ if (it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ if (!container_is_subset(leaf1->container, leaf1->typecode,
+ leaf2->container, leaf2->typecode)) {
+ return false;
+ }
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ }
+ }
+ if (!it2_present || compare_result < 0) {
+ return false;
+ } else if (compare_result > 0) {
+ art_iterator_lower_bound(&it2, it1.key);
+ }
+ }
+ return true;
+}
+
+bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ return roaring64_bitmap_get_cardinality(r1) <
+ roaring64_bitmap_get_cardinality(r2) &&
+ roaring64_bitmap_is_subset(r1, r2);
+}
+
+roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ roaring64_bitmap_t *result = roaring64_bitmap_create();
+
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL && it2.value != NULL) {
+ // Cases:
+ // 1. it1 < it2 -> it1++
+ // 2. it1 == it1 -> output it1 & it2, it1++, it2++
+ // 3. it1 > it2 -> it2++
+ int compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 2: iterators at the same high key position.
+ leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t));
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ result_leaf->container = container_and(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &result_leaf->typecode);
+
+ if (container_nonzero_cardinality(result_leaf->container,
+ result_leaf->typecode)) {
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ } else {
+ container_free(result_leaf->container, result_leaf->typecode);
+ free_leaf(result_leaf);
+ }
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ } else if (compare_result < 0) {
+ // Case 1: it1 is before it2.
+ art_iterator_lower_bound(&it1, it2.key);
+ } else {
+ // Case 3: it2 is before it1.
+ art_iterator_lower_bound(&it2, it1.key);
+ }
+ }
+ return result;
+}
+
+uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ uint64_t result = 0;
+
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL && it2.value != NULL) {
+ // Cases:
+ // 1. it1 < it2 -> it1++
+ // 2. it1 == it1 -> output cardinaltiy it1 & it2, it1++, it2++
+ // 3. it1 > it2 -> it2++
+ int compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 2: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ result +=
+ container_and_cardinality(leaf1->container, leaf1->typecode,
+ leaf2->container, leaf2->typecode);
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ } else if (compare_result < 0) {
+ // Case 1: it1 is before it2.
+ art_iterator_lower_bound(&it1, it2.key);
+ } else {
+ // Case 3: it2 is before it1.
+ art_iterator_lower_bound(&it2, it1.key);
+ }
+ }
+ return result;
+}
+
+// Inplace and (modifies its first argument).
+void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ if (r1 == r2) {
+ return;
+ }
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL) {
+ // Cases:
+ // 1. !it2_present -> erase it1
+ // 2. it2_present
+ // a. it1 < it2 -> erase it1
+ // b. it1 == it2 -> output it1 & it2, it1++, it2++
+ // c. it1 > it2 -> it2++
+ bool it2_present = it2.value != NULL;
+ int compare_result = 0;
+ if (it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 2a: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+
+ // We do the computation "in place" only when c1 is not a
+ // shared container. Rationale: using a shared container
+ // safely with in place computation would require making a
+ // copy and then doing the computation in place which is
+ // likely less efficient than avoiding in place entirely and
+ // always generating a new container.
+ uint8_t typecode2;
+ container_t *container2;
+ if (leaf1->typecode == SHARED_CONTAINER_TYPE) {
+ container2 = container_and(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ } else {
+ container2 = container_iand(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ }
+
+ if (container2 != leaf1->container) {
+ container_free(leaf1->container, leaf1->typecode);
+ leaf1->container = container2;
+ leaf1->typecode = typecode2;
+ }
+ if (!container_nonzero_cardinality(container2, typecode2)) {
+ container_free(container2, typecode2);
+ art_iterator_erase(&r1->art, &it1);
+ free_leaf(leaf1);
+ } else {
+ // Only advance the iterator if we didn't delete the
+ // leaf, as erasing advances by itself.
+ art_iterator_next(&it1);
+ }
+ art_iterator_next(&it2);
+ }
+ }
+
+ if (!it2_present || compare_result < 0) {
+ // Cases 1 and 3a: it1 is the only iterator or is before it2.
+ leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1);
+ assert(leaf != NULL);
+ container_free(leaf->container, leaf->typecode);
+ free_leaf(leaf);
+ } else if (compare_result > 0) {
+ // Case 2c: it1 is after it2.
+ art_iterator_lower_bound(&it2, it1.key);
+ }
+ }
+}
+
+bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ bool intersect = false;
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL && it2.value != NULL) {
+ // Cases:
+ // 1. it1 < it2 -> it1++
+ // 2. it1 == it1 -> intersect |= it1 & it2, it1++, it2++
+ // 3. it1 > it2 -> it2++
+ int compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 2: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ intersect |= container_intersect(leaf1->container, leaf1->typecode,
+ leaf2->container, leaf2->typecode);
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ } else if (compare_result < 0) {
+ // Case 1: it1 is before it2.
+ art_iterator_lower_bound(&it1, it2.key);
+ } else {
+ // Case 3: it2 is before it1.
+ art_iterator_lower_bound(&it2, it1.key);
+ }
+ }
+ return intersect;
+}
+
+bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max) {
+ if (min >= max) {
+ return false;
+ }
+ roaring64_iterator_t it;
+ roaring64_iterator_init_at(r, &it, /*first=*/true);
+ if (!roaring64_iterator_move_equalorlarger(&it, min)) {
+ return false;
+ }
+ return roaring64_iterator_has_value(&it) &&
+ roaring64_iterator_value(&it) < max;
+}
+
+double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ uint64_t c1 = roaring64_bitmap_get_cardinality(r1);
+ uint64_t c2 = roaring64_bitmap_get_cardinality(r2);
+ uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2);
+ return (double)inter / (double)(c1 + c2 - inter);
+}
+
+roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ roaring64_bitmap_t *result = roaring64_bitmap_create();
+
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL || it2.value != NULL) {
+ bool it1_present = it1.value != NULL;
+ bool it2_present = it2.value != NULL;
+
+ // Cases:
+ // 1. it1_present && !it2_present -> output it1, it1++
+ // 2. !it1_present && it2_present -> output it2, it2++
+ // 3. it1_present && it2_present
+ // a. it1 < it2 -> output it1, it1++
+ // b. it1 == it2 -> output it1 | it2, it1++, it2++
+ // c. it1 > it2 -> output it2, it2++
+ int compare_result = 0;
+ if (it1_present && it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 3b: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t));
+ result_leaf->container = container_or(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &result_leaf->typecode);
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ }
+ }
+ if ((it1_present && !it2_present) || compare_result < 0) {
+ // Cases 1 and 3a: it1 is the only iterator or is before it2.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value);
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it1);
+ } else if ((!it1_present && it2_present) || compare_result > 0) {
+ // Cases 2 and 3c: it2 is the only iterator or is before it1.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value);
+ art_insert(&result->art, it2.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it2);
+ }
+ }
+ return result;
+}
+
+uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ uint64_t c1 = roaring64_bitmap_get_cardinality(r1);
+ uint64_t c2 = roaring64_bitmap_get_cardinality(r2);
+ uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2);
+ return c1 + c2 - inter;
+}
+
+void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ if (r1 == r2) {
+ return;
+ }
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL || it2.value != NULL) {
+ bool it1_present = it1.value != NULL;
+ bool it2_present = it2.value != NULL;
+
+ // Cases:
+ // 1. it1_present && !it2_present -> it1++
+ // 2. !it1_present && it2_present -> add it2, it2++
+ // 3. it1_present && it2_present
+ // a. it1 < it2 -> it1++
+ // b. it1 == it2 -> it1 | it2, it1++, it2++
+ // c. it1 > it2 -> add it2, it2++
+ int compare_result = 0;
+ if (it1_present && it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 3b: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ uint8_t typecode2;
+ container_t *container2;
+ if (leaf1->typecode == SHARED_CONTAINER_TYPE) {
+ container2 = container_or(leaf1->container, leaf1->typecode,
+ leaf2->container, leaf2->typecode,
+ &typecode2);
+ } else {
+ container2 = container_ior(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ }
+ if (container2 != leaf1->container) {
+ container_free(leaf1->container, leaf1->typecode);
+ leaf1->container = container2;
+ leaf1->typecode = typecode2;
+ }
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ }
+ }
+ if ((it1_present && !it2_present) || compare_result < 0) {
+ // Cases 1 and 3a: it1 is the only iterator or is before it2.
+ art_iterator_next(&it1);
+ } else if ((!it1_present && it2_present) || compare_result > 0) {
+ // Cases 2 and 3c: it2 is the only iterator or is before it1.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value);
+ art_iterator_insert(&r1->art, &it1, it2.key,
+ (art_val_t *)result_leaf);
+ art_iterator_next(&it2);
+ }
+ }
+}
+
+roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ roaring64_bitmap_t *result = roaring64_bitmap_create();
+
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL || it2.value != NULL) {
+ bool it1_present = it1.value != NULL;
+ bool it2_present = it2.value != NULL;
+
+ // Cases:
+ // 1. it1_present && !it2_present -> output it1, it1++
+ // 2. !it1_present && it2_present -> output it2, it2++
+ // 3. it1_present && it2_present
+ // a. it1 < it2 -> output it1, it1++
+ // b. it1 == it2 -> output it1 ^ it2, it1++, it2++
+ // c. it1 > it2 -> output it2, it2++
+ int compare_result = 0;
+ if (it1_present && it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 3b: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t));
+ result_leaf->container = container_xor(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &result_leaf->typecode);
+ if (container_nonzero_cardinality(result_leaf->container,
+ result_leaf->typecode)) {
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ } else {
+ container_free(result_leaf->container,
+ result_leaf->typecode);
+ free_leaf(result_leaf);
+ }
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ }
+ }
+ if ((it1_present && !it2_present) || compare_result < 0) {
+ // Cases 1 and 3a: it1 is the only iterator or is before it2.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value);
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it1);
+ } else if ((!it1_present && it2_present) || compare_result > 0) {
+ // Cases 2 and 3c: it2 is the only iterator or is before it1.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value);
+ art_insert(&result->art, it2.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it2);
+ }
+ }
+ return result;
+}
+
+uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ uint64_t c1 = roaring64_bitmap_get_cardinality(r1);
+ uint64_t c2 = roaring64_bitmap_get_cardinality(r2);
+ uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2);
+ return c1 + c2 - 2 * inter;
+}
+
+void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ assert(r1 != r2);
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL || it2.value != NULL) {
+ bool it1_present = it1.value != NULL;
+ bool it2_present = it2.value != NULL;
+
+ // Cases:
+ // 1. it1_present && !it2_present -> it1++
+ // 2. !it1_present && it2_present -> add it2, it2++
+ // 3. it1_present && it2_present
+ // a. it1 < it2 -> it1++
+ // b. it1 == it2 -> it1 ^ it2, it1++, it2++
+ // c. it1 > it2 -> add it2, it2++
+ int compare_result = 0;
+ if (it1_present && it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 3b: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ container_t *container1 = leaf1->container;
+ uint8_t typecode1 = leaf1->typecode;
+ uint8_t typecode2;
+ container_t *container2;
+ if (leaf1->typecode == SHARED_CONTAINER_TYPE) {
+ container2 = container_xor(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ if (container2 != container1) {
+ // We only free when doing container_xor, not
+ // container_ixor, as ixor frees the original
+ // internally.
+ container_free(container1, typecode1);
+ }
+ } else {
+ container2 = container_ixor(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ }
+ leaf1->container = container2;
+ leaf1->typecode = typecode2;
+
+ if (!container_nonzero_cardinality(container2, typecode2)) {
+ container_free(container2, typecode2);
+ art_iterator_erase(&r1->art, &it1);
+ free_leaf(leaf1);
+ } else {
+ // Only advance the iterator if we didn't delete the
+ // leaf, as erasing advances by itself.
+ art_iterator_next(&it1);
+ }
+ art_iterator_next(&it2);
+ }
+ }
+ if ((it1_present && !it2_present) || compare_result < 0) {
+ // Cases 1 and 3a: it1 is the only iterator or is before it2.
+ art_iterator_next(&it1);
+ } else if ((!it1_present && it2_present) || compare_result > 0) {
+ // Cases 2 and 3c: it2 is the only iterator or is before it1.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value);
+ if (it1_present) {
+ art_iterator_insert(&r1->art, &it1, it2.key,
+ (art_val_t *)result_leaf);
+ art_iterator_next(&it1);
+ } else {
+ art_insert(&r1->art, it2.key, (art_val_t *)result_leaf);
+ }
+ art_iterator_next(&it2);
+ }
+ }
+}
+
+roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ roaring64_bitmap_t *result = roaring64_bitmap_create();
+
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL) {
+ // Cases:
+ // 1. it1_present && !it2_present -> output it1, it1++
+ // 2. it1_present && it2_present
+ // a. it1 < it2 -> output it1, it1++
+ // b. it1 == it2 -> output it1 - it2, it1++, it2++
+ // c. it1 > it2 -> it2++
+ bool it2_present = it2.value != NULL;
+ int compare_result = 0;
+ if (it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 2b: iterators at the same high key position.
+ leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t));
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ result_leaf->container = container_andnot(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &result_leaf->typecode);
+
+ if (container_nonzero_cardinality(result_leaf->container,
+ result_leaf->typecode)) {
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ } else {
+ container_free(result_leaf->container,
+ result_leaf->typecode);
+ free_leaf(result_leaf);
+ }
+ art_iterator_next(&it1);
+ art_iterator_next(&it2);
+ }
+ }
+ if (!it2_present || compare_result < 0) {
+ // Cases 1 and 2a: it1 is the only iterator or is before it2.
+ leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value);
+ art_insert(&result->art, it1.key, (art_val_t *)result_leaf);
+ art_iterator_next(&it1);
+ } else if (compare_result > 0) {
+ // Case 2c: it1 is after it2.
+ art_iterator_next(&it2);
+ }
+ }
+ return result;
+}
+
+uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ uint64_t c1 = roaring64_bitmap_get_cardinality(r1);
+ uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2);
+ return c1 - inter;
+}
+
+void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1,
+ const roaring64_bitmap_t *r2) {
+ art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true);
+ art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true);
+
+ while (it1.value != NULL) {
+ // Cases:
+ // 1. it1_present && !it2_present -> it1++
+ // 2. it1_present && it2_present
+ // a. it1 < it2 -> it1++
+ // b. it1 == it2 -> it1 - it2, it1++, it2++
+ // c. it1 > it2 -> it2++
+ bool it2_present = it2.value != NULL;
+ int compare_result = 0;
+ if (it2_present) {
+ compare_result = compare_high48(it1.key, it2.key);
+ if (compare_result == 0) {
+ // Case 2b: iterators at the same high key position.
+ leaf_t *leaf1 = (leaf_t *)it1.value;
+ leaf_t *leaf2 = (leaf_t *)it2.value;
+ container_t *container1 = leaf1->container;
+ uint8_t typecode1 = leaf1->typecode;
+ uint8_t typecode2;
+ container_t *container2;
+ if (leaf1->typecode == SHARED_CONTAINER_TYPE) {
+ container2 = container_andnot(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ if (container2 != container1) {
+ // We only free when doing container_andnot, not
+ // container_iandnot, as iandnot frees the original
+ // internally.
+ container_free(container1, typecode1);
+ }
+ } else {
+ container2 = container_iandnot(
+ leaf1->container, leaf1->typecode, leaf2->container,
+ leaf2->typecode, &typecode2);
+ }
+ if (container2 != container1) {
+ leaf1->container = container2;
+ leaf1->typecode = typecode2;
+ }
+
+ if (!container_nonzero_cardinality(container2, typecode2)) {
+ container_free(container2, typecode2);
+ art_iterator_erase(&r1->art, &it1);
+ free_leaf(leaf1);
+ } else {
+ // Only advance the iterator if we didn't delete the
+ // leaf, as erasing advances by itself.
+ art_iterator_next(&it1);
+ }
+ art_iterator_next(&it2);
+ }
+ }
+ if (!it2_present || compare_result < 0) {
+ // Cases 1 and 2a: it1 is the only iterator or is before it2.
+ art_iterator_next(&it1);
+ } else if (compare_result > 0) {
+ // Case 2c: it1 is after it2.
+ art_iterator_next(&it2);
+ }
+ }
+}
+
+/**
+ * Flips the leaf at high48 in the range [min, max), returning a new leaf with a
+ * new container. If the high48 key is not found in the existing bitmap, a new
+ * container is created. Returns null if the negation results in an empty range.
+ */
+static leaf_t *roaring64_flip_leaf(const roaring64_bitmap_t *r,
+ uint8_t high48[], uint32_t min,
+ uint32_t max) {
+ leaf_t *leaf1 = (leaf_t *)art_find(&r->art, high48);
+ container_t *container2;
+ uint8_t typecode2;
+ if (leaf1 == NULL) {
+ // No container at this key, create a full container.
+ container2 = container_range_of_ones(min, max, &typecode2);
+ } else if (min == 0 && max > 0xFFFF) {
+ // Flip whole container.
+ container2 =
+ container_not(leaf1->container, leaf1->typecode, &typecode2);
+ } else {
+ // Partially flip a container.
+ container2 = container_not_range(leaf1->container, leaf1->typecode, min,
+ max, &typecode2);
+ }
+ if (container_nonzero_cardinality(container2, typecode2)) {
+ return create_leaf(container2, typecode2);
+ }
+ container_free(container2, typecode2);
+ return NULL;
+}
+
+/**
+ * Flips the leaf at high48 in the range [min, max). If the high48 key is not
+ * found in the bitmap, a new container is created. Deletes the leaf and
+ * associated container if the negation results in an empty range.
+ */
+static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[],
+ uint32_t min, uint32_t max) {
+ leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
+ container_t *container2;
+ uint8_t typecode2;
+ if (leaf == NULL) {
+ // No container at this key, insert a full container.
+ container2 = container_range_of_ones(min, max, &typecode2);
+ art_insert(&r->art, high48,
+ (art_val_t *)create_leaf(container2, typecode2));
+ return;
+ }
+
+ if (min == 0 && max > 0xFFFF) {
+ // Flip whole container.
+ container2 =
+ container_inot(leaf->container, leaf->typecode, &typecode2);
+ } else {
+ // Partially flip a container.
+ container2 = container_inot_range(leaf->container, leaf->typecode, min,
+ max, &typecode2);
+ }
+
+ leaf->container = container2;
+ leaf->typecode = typecode2;
+
+ if (!container_nonzero_cardinality(leaf->container, leaf->typecode)) {
+ art_erase(&r->art, high48);
+ container_free(leaf->container, leaf->typecode);
+ free_leaf(leaf);
+ }
+}
+
+roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r,
+ uint64_t min, uint64_t max) {
+ if (min >= max) {
+ return roaring64_bitmap_copy(r);
+ }
+ return roaring64_bitmap_flip_closed(r, min, max - 1);
+}
+
+roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1,
+ uint64_t min, uint64_t max) {
+ if (min > max) {
+ return roaring64_bitmap_copy(r1);
+ }
+ uint8_t min_high48_key[ART_KEY_BYTES];
+ uint16_t min_low16 = split_key(min, min_high48_key);
+ uint8_t max_high48_key[ART_KEY_BYTES];
+ uint16_t max_low16 = split_key(max, max_high48_key);
+ uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16;
+ uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16;
+
+ roaring64_bitmap_t *r2 = roaring64_bitmap_create();
+ art_iterator_t it = art_init_iterator(&r1->art, /*first=*/true);
+
+ // Copy the containers before min unchanged.
+ while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) {
+ leaf_t *leaf1 = (leaf_t *)it.value;
+ uint8_t typecode2 = leaf1->typecode;
+ container_t *container2 = get_copy_of_container(
+ leaf1->container, &typecode2, /*copy_on_write=*/false);
+ art_insert(&r2->art, it.key,
+ (art_val_t *)create_leaf(container2, typecode2));
+ art_iterator_next(&it);
+ }
+
+ // Flip the range (including non-existent containers!) between min and max.
+ uint64_t high48_bits; for(high48_bits = min_high48_bits; high48_bits <= max_high48_bits;
+ high48_bits++) {
+ uint8_t current_high48_key[ART_KEY_BYTES];
+ split_key(high48_bits << 16, current_high48_key);
+
+ uint32_t min_container = 0;
+ if (high48_bits == min_high48_bits) {
+ min_container = min_low16;
+ }
+ uint32_t max_container = 0xFFFF + 1; // Exclusive range.
+ if (high48_bits == max_high48_bits) {
+ max_container = max_low16 + 1; // Exclusive.
+ }
+
+ leaf_t *leaf = roaring64_flip_leaf(r1, current_high48_key,
+ min_container, max_container);
+ if (leaf != NULL) {
+ art_insert(&r2->art, current_high48_key, (art_val_t *)leaf);
+ }
+ }
+
+ // Copy the containers after max unchanged.
+ it = art_upper_bound(&r1->art, max_high48_key);
+ while (it.value != NULL) {
+ leaf_t *leaf1 = (leaf_t *)it.value;
+ uint8_t typecode2 = leaf1->typecode;
+ container_t *container2 = get_copy_of_container(
+ leaf1->container, &typecode2, /*copy_on_write=*/false);
+ art_insert(&r2->art, it.key,
+ (art_val_t *)create_leaf(container2, typecode2));
+ art_iterator_next(&it);
+ }
+
+ return r2;
+}
+
+void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min >= max) {
+ return;
+ }
+ roaring64_bitmap_flip_closed_inplace(r, min, max - 1);
+}
+
+void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
+ uint64_t max) {
+ if (min > max) {
+ return;
+ }
+ uint16_t min_low16 = (uint16_t)min;
+ uint16_t max_low16 = (uint16_t)max;
+ uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16;
+ uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16;
+
+ // Flip the range (including non-existent containers!) between min and max.
+ uint64_t high48_bits; for(high48_bits = min_high48_bits; high48_bits <= max_high48_bits;
+ high48_bits++) {
+ uint8_t current_high48_key[ART_KEY_BYTES];
+ split_key(high48_bits << 16, current_high48_key);
+
+ uint32_t min_container = 0;
+ if (high48_bits == min_high48_bits) {
+ min_container = min_low16;
+ }
+ uint32_t max_container = 0xFFFF + 1; // Exclusive range.
+ if (high48_bits == max_high48_bits) {
+ max_container = max_low16 + 1; // Exclusive.
+ }
+
+ roaring64_flip_leaf_inplace(r, current_high48_key, min_container,
+ max_container);
+ }
+}
+
+// Returns the number of distinct high 32-bit entries in the bitmap.
+static inline uint64_t count_high32(const roaring64_bitmap_t *r) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint64_t high32_count = 0;
+ uint32_t prev_high32 = 0;
+ while (it.value != NULL) {
+ uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32);
+ if (high32_count == 0 || prev_high32 != current_high32) {
+ high32_count++;
+ prev_high32 = current_high32;
+ }
+ art_iterator_next(&it);
+ }
+ return high32_count;
+}
+
+// Frees the (32-bit!) bitmap without freeing the containers.
+static inline void roaring_bitmap_free_without_containers(roaring_bitmap_t *r) {
+ ra_clear_without_containers(&r->high_low_container);
+ roaring_free(r);
+}
+
+size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) {
+ // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ size_t size = 0;
+
+ // Write as uint64 the distinct number of "buckets", where a bucket is
+ // defined as the most significant 32 bits of an element.
+ uint64_t high32_count;
+ size += sizeof(high32_count);
+
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint32_t prev_high32 = 0;
+ roaring_bitmap_t *bitmap32 = NULL;
+
+ // Iterate through buckets ordered by increasing keys.
+ while (it.value != NULL) {
+ uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32);
+ if (bitmap32 == NULL || prev_high32 != current_high32) {
+ if (bitmap32 != NULL) {
+ // Write as uint32 the most significant 32 bits of the bucket.
+ size += sizeof(prev_high32);
+
+ // Write the 32-bit Roaring bitmaps representing the least
+ // significant bits of a set of elements.
+ size += roaring_bitmap_portable_size_in_bytes(bitmap32);
+ roaring_bitmap_free_without_containers(bitmap32);
+ }
+
+ // Start a new 32-bit bitmap with the current high 32 bits.
+ art_iterator_t it2 = it;
+ uint32_t containers_with_high32 = 0;
+ while (it2.value != NULL && (uint32_t)(combine_key(it2.key, 0) >>
+ 32) == current_high32) {
+ containers_with_high32++;
+ art_iterator_next(&it2);
+ }
+ bitmap32 =
+ roaring_bitmap_create_with_capacity(containers_with_high32);
+
+ prev_high32 = current_high32;
+ }
+ leaf_t *leaf = (leaf_t *)it.value;
+ ra_append(&bitmap32->high_low_container,
+ (uint16_t)(current_high32 >> 16), leaf->container,
+ leaf->typecode);
+ art_iterator_next(&it);
+ }
+
+ if (bitmap32 != NULL) {
+ // Write as uint32 the most significant 32 bits of the bucket.
+ size += sizeof(prev_high32);
+
+ // Write the 32-bit Roaring bitmaps representing the least
+ // significant bits of a set of elements.
+ size += roaring_bitmap_portable_size_in_bytes(bitmap32);
+ roaring_bitmap_free_without_containers(bitmap32);
+ }
+
+ return size;
+}
+
+size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r,
+ char *buf) {
+ // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ if (buf == NULL) {
+ return 0;
+ }
+ const char *initial_buf = buf;
+
+ // Write as uint64 the distinct number of "buckets", where a bucket is
+ // defined as the most significant 32 bits of an element.
+ uint64_t high32_count = count_high32(r);
+ memcpy(buf, &high32_count, sizeof(high32_count));
+ buf += sizeof(high32_count);
+
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ uint32_t prev_high32 = 0;
+ roaring_bitmap_t *bitmap32 = NULL;
+
+ // Iterate through buckets ordered by increasing keys.
+ while (it.value != NULL) {
+ uint64_t current_high48 = combine_key(it.key, 0);
+ uint32_t current_high32 = (uint32_t)(current_high48 >> 32);
+ if (bitmap32 == NULL || prev_high32 != current_high32) {
+ if (bitmap32 != NULL) {
+ // Write as uint32 the most significant 32 bits of the bucket.
+ memcpy(buf, &prev_high32, sizeof(prev_high32));
+ buf += sizeof(prev_high32);
+
+ // Write the 32-bit Roaring bitmaps representing the least
+ // significant bits of a set of elements.
+ buf += roaring_bitmap_portable_serialize(bitmap32, buf);
+ roaring_bitmap_free_without_containers(bitmap32);
+ }
+
+ // Start a new 32-bit bitmap with the current high 32 bits.
+ art_iterator_t it2 = it;
+ uint32_t containers_with_high32 = 0;
+ while (it2.value != NULL &&
+ (uint32_t)combine_key(it2.key, 0) == current_high32) {
+ containers_with_high32++;
+ art_iterator_next(&it2);
+ }
+ bitmap32 =
+ roaring_bitmap_create_with_capacity(containers_with_high32);
+
+ prev_high32 = current_high32;
+ }
+ leaf_t *leaf = (leaf_t *)it.value;
+ ra_append(&bitmap32->high_low_container,
+ (uint16_t)(current_high48 >> 16), leaf->container,
+ leaf->typecode);
+ art_iterator_next(&it);
+ }
+
+ if (bitmap32 != NULL) {
+ // Write as uint32 the most significant 32 bits of the bucket.
+ memcpy(buf, &prev_high32, sizeof(prev_high32));
+ buf += sizeof(prev_high32);
+
+ // Write the 32-bit Roaring bitmaps representing the least
+ // significant bits of a set of elements.
+ buf += roaring_bitmap_portable_serialize(bitmap32, buf);
+ roaring_bitmap_free_without_containers(bitmap32);
+ }
+
+ return buf - initial_buf;
+}
+
+size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
+ size_t maxbytes) {
+ // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ if (buf == NULL) {
+ return 0;
+ }
+ size_t read_bytes = 0;
+
+ // Read as uint64 the distinct number of "buckets", where a bucket is
+ // defined as the most significant 32 bits of an element.
+ uint64_t buckets;
+ if (read_bytes + sizeof(buckets) > maxbytes) {
+ return 0;
+ }
+ memcpy(&buckets, buf, sizeof(buckets));
+ buf += sizeof(buckets);
+ read_bytes += sizeof(buckets);
+
+ // Buckets should be 32 bits with 4 bits of zero padding.
+ if (buckets > UINT32_MAX) {
+ return 0;
+ }
+
+ // Iterate through buckets ordered by increasing keys.
+ uint64_t bucket; for(bucket = 0; bucket < buckets; ++bucket) {
+ // Read as uint32 the most significant 32 bits of the bucket.
+ uint32_t high32;
+ if (read_bytes + sizeof(high32) > maxbytes) {
+ return 0;
+ }
+ buf += sizeof(high32);
+ read_bytes += sizeof(high32);
+
+ // Read the 32-bit Roaring bitmaps representing the least significant
+ // bits of a set of elements.
+ size_t bitmap32_size = roaring_bitmap_portable_deserialize_size(
+ buf, maxbytes - read_bytes);
+ if (bitmap32_size == 0) {
+ return 0;
+ }
+ buf += bitmap32_size;
+ read_bytes += bitmap32_size;
+ }
+ return read_bytes;
+}
+
+roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(
+ const char *buf, size_t maxbytes) {
+ // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
+ if (buf == NULL) {
+ return NULL;
+ }
+ size_t read_bytes = 0;
+
+ // Read as uint64 the distinct number of "buckets", where a bucket is
+ // defined as the most significant 32 bits of an element.
+ uint64_t buckets;
+ if (read_bytes + sizeof(buckets) > maxbytes) {
+ return NULL;
+ }
+ memcpy(&buckets, buf, sizeof(buckets));
+ buf += sizeof(buckets);
+ read_bytes += sizeof(buckets);
+
+ // Buckets should be 32 bits with 4 bits of zero padding.
+ if (buckets > UINT32_MAX) {
+ return NULL;
+ }
+
+ roaring64_bitmap_t *r = roaring64_bitmap_create();
+ // Iterate through buckets ordered by increasing keys.
+ uint64_t bucket; for(bucket = 0; bucket < buckets; ++bucket) {
+ // Read as uint32 the most significant 32 bits of the bucket.
+ uint32_t high32;
+ if (read_bytes + sizeof(high32) > maxbytes) {
+ roaring64_bitmap_free(r);
+ return NULL;
+ }
+ memcpy(&high32, buf, sizeof(high32));
+ buf += sizeof(high32);
+ read_bytes += sizeof(high32);
+
+ // Read the 32-bit Roaring bitmaps representing the least significant
+ // bits of a set of elements.
+ size_t bitmap32_size = roaring_bitmap_portable_deserialize_size(
+ buf, maxbytes - read_bytes);
+ if (bitmap32_size == 0) {
+ roaring64_bitmap_free(r);
+ return NULL;
+ }
+
+ roaring_bitmap_t *bitmap32 = roaring_bitmap_portable_deserialize_safe(
+ buf, maxbytes - read_bytes);
+ if (bitmap32 == NULL) {
+ roaring64_bitmap_free(r);
+ return NULL;
+ }
+ buf += bitmap32_size;
+ read_bytes += bitmap32_size;
+
+ // Insert all containers of the 32-bit bitmap into the 64-bit bitmap.
+ uint32_t r32_size = ra_get_size(&bitmap32->high_low_container);
+ size_t i; for(i = 0; i < r32_size; ++i) {
+ uint16_t key16 =
+ ra_get_key_at_index(&bitmap32->high_low_container, (uint16_t)i);
+ uint8_t typecode;
+ container_t *container = ra_get_container_at_index(
+ &bitmap32->high_low_container, (uint16_t)i, &typecode);
+
+ uint64_t high48_bits =
+ (((uint64_t)high32) << 32) | (((uint64_t)key16) << 16);
+ uint8_t high48[ART_KEY_BYTES];
+ split_key(high48_bits, high48);
+ leaf_t *leaf = create_leaf(container, typecode);
+ art_insert(&r->art, high48, (art_val_t *)leaf);
+ }
+ roaring_bitmap_free_without_containers(bitmap32);
+ }
+ return r;
+}
+
+bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,
+ roaring_iterator64 iterator, void *ptr) {
+ art_iterator_t it = art_init_iterator(&r->art, /*first=*/true);
+ while (it.value != NULL) {
+ uint64_t high48 = combine_key(it.key, 0);
+ uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL;
+ uint32_t low32 = high48;
+ leaf_t *leaf = (leaf_t *)it.value;
+ if (!container_iterate64(leaf->container, leaf->typecode, low32,
+ iterator, high32, ptr)) {
+ return false;
+ }
+ art_iterator_next(&it);
+ }
+ return true;
+}
+
+void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r,
+ uint64_t *out) {
+ roaring64_iterator_t it = {0};
+ roaring64_iterator_init_at(r, &it, /*first=*/true);
+ roaring64_iterator_read(&it, out, UINT64_MAX);
+}
+
+roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) {
+ roaring64_iterator_t *it =
+ (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t));
+ return roaring64_iterator_init_at(r, it, /*first=*/true);
+}
+
+roaring64_iterator_t *roaring64_iterator_create_last(
+ const roaring64_bitmap_t *r) {
+ roaring64_iterator_t *it =
+ (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t));
+ return roaring64_iterator_init_at(r, it, /*first=*/false);
+}
+
+void roaring64_iterator_reinit(const roaring64_bitmap_t *r,
+ roaring64_iterator_t *it) {
+ roaring64_iterator_init_at(r, it, /*first=*/true);
+}
+
+void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r,
+ roaring64_iterator_t *it) {
+ roaring64_iterator_init_at(r, it, /*first=*/false);
+}
+
+roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it) {
+ roaring64_iterator_t *new_it =
+ (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t));
+ memcpy(new_it, it, sizeof(*it));
+ return new_it;
+}
+
+void roaring64_iterator_free(roaring64_iterator_t *it) { roaring_free(it); }
+
+bool roaring64_iterator_has_value(const roaring64_iterator_t *it) {
+ return it->has_value;
+}
+
+uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) {
+ return it->value;
+}
+
+bool roaring64_iterator_advance(roaring64_iterator_t *it) {
+ if (it->art_it.value == NULL) {
+ if (it->saturated_forward) {
+ return (it->has_value = false);
+ }
+ roaring64_iterator_init_at(it->parent, it, /*first=*/true);
+ return it->has_value;
+ }
+ leaf_t *leaf = (leaf_t *)it->art_it.value;
+ uint16_t low16 = (uint16_t)it->value;
+ if (container_iterator_next(leaf->container, leaf->typecode,
+ &it->container_it, &low16)) {
+ it->value = it->high48 | low16;
+ return (it->has_value = true);
+ }
+ if (art_iterator_next(&it->art_it)) {
+ return roaring64_iterator_init_at_leaf_first(it);
+ }
+ it->saturated_forward = true;
+ return (it->has_value = false);
+}
+
+bool roaring64_iterator_previous(roaring64_iterator_t *it) {
+ if (it->art_it.value == NULL) {
+ if (!it->saturated_forward) {
+ // Saturated backward.
+ return (it->has_value = false);
+ }
+ roaring64_iterator_init_at(it->parent, it, /*first=*/false);
+ return it->has_value;
+ }
+ leaf_t *leaf = (leaf_t *)it->art_it.value;
+ uint16_t low16 = (uint16_t)it->value;
+ if (container_iterator_prev(leaf->container, leaf->typecode,
+ &it->container_it, &low16)) {
+ it->value = it->high48 | low16;
+ return (it->has_value = true);
+ }
+ if (art_iterator_prev(&it->art_it)) {
+ return roaring64_iterator_init_at_leaf_last(it);
+ }
+ it->saturated_forward = false; // Saturated backward.
+ return (it->has_value = false);
+}
+
+bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it,
+ uint64_t val) {
+ uint8_t val_high48[ART_KEY_BYTES];
+ uint16_t val_low16 = split_key(val, val_high48);
+ if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) {
+ // The ART iterator is before or after the high48 bits of `val` (or
+ // beyond the ART altogether), so we need to move to a leaf with a key
+ // equal or greater.
+ if (!art_iterator_lower_bound(&it->art_it, val_high48)) {
+ // Only smaller keys found.
+ it->saturated_forward = true;
+ return (it->has_value = false);
+ }
+ it->high48 = combine_key(it->art_it.key, 0);
+ // Fall through to the next if statement.
+ }
+
+ if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) {
+ // We're at equal high bits, check if a suitable value can be found in
+ // this container.
+ leaf_t *leaf = (leaf_t *)it->art_it.value;
+ uint16_t low16 = (uint16_t)it->value;
+ if (container_iterator_lower_bound(leaf->container, leaf->typecode,
+ &it->container_it, &low16,
+ val_low16)) {
+ it->value = it->high48 | low16;
+ return (it->has_value = true);
+ }
+ // Only smaller entries in this container, move to the next.
+ if (!art_iterator_next(&it->art_it)) {
+ it->saturated_forward = true;
+ return (it->has_value = false);
+ }
+ }
+
+ // We're at a leaf with high bits greater than `val`, so the first entry in
+ // this container is our result.
+ return roaring64_iterator_init_at_leaf_first(it);
+}
+
+uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf,
+ uint64_t count) {
+ uint64_t consumed = 0;
+ while (it->has_value && consumed < count) {
+ uint32_t container_consumed;
+ leaf_t *leaf = (leaf_t *)it->art_it.value;
+ uint16_t low16 = (uint16_t)it->value;
+ uint32_t container_count = UINT32_MAX;
+ if (count - consumed < (uint64_t)UINT32_MAX) {
+ container_count = count - consumed;
+ }
+ bool has_value = container_iterator_read_into_uint64(
+ leaf->container, leaf->typecode, &it->container_it, it->high48, buf,
+ container_count, &container_consumed, &low16);
+ consumed += container_consumed;
+ buf += container_consumed;
+ if (has_value) {
+ it->has_value = true;
+ it->value = it->high48 | low16;
+ assert(consumed == count);
+ return consumed;
+ }
+ it->has_value = art_iterator_next(&it->art_it);
+ if (it->has_value) {
+ roaring64_iterator_init_at_leaf_first(it);
+ }
+ }
+ return consumed;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace roaring
+} // namespace api
+#endif
+/* end file src/roaring64.c */
/* begin file src/roaring_array.c */
#include <assert.h>
+#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <inttypes.h>
#ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" {
+namespace roaring {
+namespace internal {
#endif
// Convention: [0,ra->size) all elements are initialized
@@ -18639,20 +24818,22 @@ extern "C" { namespace roaring { namespace internal {
extern inline int32_t ra_get_size(const roaring_array_t *ra);
extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
-extern inline container_t *ra_get_container_at_index(
- const roaring_array_t *ra, uint16_t i,
- uint8_t *typecode);
+extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra,
+ uint16_t i,
+ uint8_t *typecode);
extern inline void ra_unshare_container_at_index(roaring_array_t *ra,
uint16_t i);
-extern inline void ra_replace_key_and_container_at_index(
- roaring_array_t *ra, int32_t i, uint16_t key,
- container_t *c, uint8_t typecode);
+extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra,
+ int32_t i,
+ uint16_t key,
+ container_t *c,
+ uint8_t typecode);
-extern inline void ra_set_container_at_index(
- const roaring_array_t *ra, int32_t i,
- container_t *c, uint8_t typecode);
+extern inline void ra_set_container_at_index(const roaring_array_t *ra,
+ int32_t i, container_t *c,
+ uint8_t typecode);
static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
//
@@ -18660,16 +24841,17 @@ static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
// Struct-of-Arrays vs. Array-of-Structs:
// https://github.com/RoaringBitmap/CRoaring/issues/256
- if ( new_capacity == 0 ) {
- roaring_free(ra->containers);
- ra->containers = NULL;
- ra->keys = NULL;
- ra->typecodes = NULL;
- ra->allocation_size = 0;
- return true;
+ if (new_capacity == 0) {
+ roaring_free(ra->containers);
+ ra->containers = NULL;
+ ra->keys = NULL;
+ ra->typecodes = NULL;
+ ra->allocation_size = 0;
+ return true;
}
- const size_t memoryneeded = new_capacity * (
- sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
+ const size_t memoryneeded =
+ new_capacity *
+ (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
void *bigalloc = roaring_malloc(memoryneeded);
if (!bigalloc) return false;
void *oldbigalloc = ra->containers;
@@ -18678,10 +24860,10 @@ static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
assert((char *)(newtypecodes + new_capacity) ==
(char *)bigalloc + memoryneeded);
- if(ra->size > 0) {
- memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size);
- memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
- memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
+ if (ra->size > 0) {
+ memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size);
+ memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
+ memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
}
ra->containers = newcontainers;
ra->keys = newkeys;
@@ -18695,17 +24877,21 @@ bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
if (!new_ra) return false;
ra_init(new_ra);
- if (cap > INT32_MAX) { return false; }
+ // Containers hold 64Ki elements, so 64Ki containers is enough to hold
+ // `0x10000 * 0x10000` (all 2^32) elements
+ if (cap > 0x10000) {
+ cap = 0x10000;
+ }
- if(cap > 0) {
- void *bigalloc = roaring_malloc(cap *
- (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)));
- if( bigalloc == NULL ) return false;
- new_ra->containers = (container_t **)bigalloc;
- new_ra->keys = (uint16_t *)(new_ra->containers + cap);
- new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
- // Narrowing is safe because of above check
- new_ra->allocation_size = (int32_t)cap;
+ if (cap > 0) {
+ void *bigalloc = roaring_malloc(
+ cap * (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)));
+ if (bigalloc == NULL) return false;
+ new_ra->containers = (container_t **)bigalloc;
+ new_ra->keys = (uint16_t *)(new_ra->containers + cap);
+ new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
+ // Narrowing is safe because of above check
+ new_ra->allocation_size = (int32_t)cap;
}
return true;
}
@@ -18714,14 +24900,16 @@ int ra_shrink_to_fit(roaring_array_t *ra) {
int savings = (ra->allocation_size - ra->size) *
(sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
if (!realloc_array(ra, ra->size)) {
- return 0;
+ return 0;
}
ra->allocation_size = ra->size;
return savings;
}
void ra_init(roaring_array_t *new_ra) {
- if (!new_ra) { return; }
+ if (!new_ra) {
+ return;
+ }
new_ra->keys = NULL;
new_ra->containers = NULL;
new_ra->typecodes = NULL;
@@ -18734,9 +24922,9 @@ void ra_init(roaring_array_t *new_ra) {
bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
bool copy_on_write) {
ra_clear_containers(dest); // we are going to overwrite them
- if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size
- dest->size = 0; // <--- This is important.
- return true; // output was just cleared, so they match
+ if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size
+ dest->size = 0; // <--- This is important.
+ return true; // output was just cleared, so they match
}
if (dest->allocation_size < source->size) {
if (!realloc_array(dest, source->size)) {
@@ -18747,7 +24935,7 @@ bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
// we go through the containers, turning them into shared containers...
if (copy_on_write) {
- int32_t i; for (i = 0; i < dest->size; ++i) {
+ int32_t i; for(i = 0; i < dest->size; ++i) {
source->containers[i] = get_copy_of_container(
source->containers[i], &source->typecodes[i], copy_on_write);
}
@@ -18759,11 +24947,11 @@ bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
} else {
memcpy(dest->typecodes, source->typecodes,
dest->size * sizeof(uint8_t));
- int32_t i; for (i = 0; i < dest->size; i++) {
+ int32_t i; for(i = 0; i < dest->size; i++) {
dest->containers[i] =
container_clone(source->containers[i], source->typecodes[i]);
if (dest->containers[i] == NULL) {
- int32_t j; for ( j = 0; j < i; j++) {
+ int32_t j; for(j = 0; j < i; j++) {
container_free(dest->containers[j], dest->typecodes[j]);
}
ra_clear_without_containers(dest);
@@ -18775,19 +24963,20 @@ bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
}
void ra_clear_containers(roaring_array_t *ra) {
- int32_t i; for (i = 0; i < ra->size; ++i) {
+ int32_t i; for(i = 0; i < ra->size; ++i) {
container_free(ra->containers[i], ra->typecodes[i]);
}
}
void ra_reset(roaring_array_t *ra) {
- ra_clear_containers(ra);
- ra->size = 0;
- ra_shrink_to_fit(ra);
+ ra_clear_containers(ra);
+ ra->size = 0;
+ ra_shrink_to_fit(ra);
}
void ra_clear_without_containers(roaring_array_t *ra) {
- roaring_free(ra->containers); // keys and typecodes are allocated with containers
+ roaring_free(
+ ra->containers); // keys and typecodes are allocated with containers
ra->size = 0;
ra->allocation_size = 0;
ra->containers = NULL;
@@ -18816,10 +25005,8 @@ bool extend_array(roaring_array_t *ra, int32_t k) {
return true;
}
-void ra_append(
- roaring_array_t *ra, uint16_t key,
- container_t *c, uint8_t typecode
-){
+void ra_append(roaring_array_t *ra, uint16_t key, container_t *c,
+ uint8_t typecode) {
extend_array(ra, 1);
const int32_t pos = ra->size;
@@ -18834,7 +25021,7 @@ void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
extend_array(ra, 1);
const int32_t pos = ra->size;
- // old contents is junk not needing freeing
+ // old contents is junk that does not need freeing
ra->keys[pos] = sa->keys[index];
// the shared container will be in two bitmaps
if (copy_on_write) {
@@ -18852,9 +25039,9 @@ void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t stopping_key, bool copy_on_write) {
- int32_t i; for (i = 0; i < sa->size; ++i) {
+ int32_t i; for(i = 0; i < sa->size; ++i) {
if (sa->keys[i] >= stopping_key) break;
- ra_append_copy(ra, sa, i, copy_on_write);
+ ra_append_copy(ra, sa, (uint16_t)i, copy_on_write);
}
}
@@ -18862,7 +25049,7 @@ void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
int32_t start_index, int32_t end_index,
bool copy_on_write) {
extend_array(ra, end_index - start_index);
- int32_t i; for (i = start_index; i < end_index; ++i) {
+ int32_t i; for(i = start_index; i < end_index; ++i) {
const int32_t pos = ra->size;
ra->keys[pos] = sa->keys[i];
if (copy_on_write) {
@@ -18893,7 +25080,7 @@ void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
int32_t start_index, int32_t end_index) {
extend_array(ra, end_index - start_index);
- int32_t i; for (i = start_index; i < end_index; ++i) {
+ int32_t i; for(i = start_index; i < end_index; ++i) {
const int32_t pos = ra->size;
ra->keys[pos] = sa->keys[i];
@@ -18908,7 +25095,7 @@ void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
bool copy_on_write) {
extend_array(ra, end_index - start_index);
- int32_t i; for (i = start_index; i < end_index; ++i) {
+ int32_t i; for(i = start_index; i < end_index; ++i) {
const int32_t pos = ra->size;
ra->keys[pos] = sa->keys[i];
if (copy_on_write) {
@@ -18925,18 +25112,17 @@ void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
}
}
-container_t *ra_get_container(
- roaring_array_t *ra, uint16_t x, uint8_t *typecode
-){
+container_t *ra_get_container(roaring_array_t *ra, uint16_t x,
+ uint8_t *typecode) {
int i = binarySearch(ra->keys, (int32_t)ra->size, x);
if (i < 0) return NULL;
*typecode = ra->typecodes[i];
return ra->containers[i];
}
-extern inline container_t *ra_get_container_at_index(
- const roaring_array_t *ra, uint16_t i,
- uint8_t *typecode);
+extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra,
+ uint16_t i,
+ uint8_t *typecode);
extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra,
uint16_t i);
@@ -18944,7 +25130,7 @@ extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra,
extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
- int32_t pos);
+ int32_t pos);
// everything skipped over is freed
int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
@@ -18955,10 +25141,8 @@ int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
return pos;
}
-void ra_insert_new_key_value_at(
- roaring_array_t *ra, int32_t i, uint16_t key,
- container_t *c, uint8_t typecode
-){
+void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode) {
extend_array(ra, 1);
// May be an optimization opportunity with DIY memmove
memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
@@ -19027,8 +25211,7 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
}
int32_t srcpos = ra->size - count;
int32_t dstpos = srcpos + distance;
- memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]),
- sizeof(uint16_t) * count);
+ memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), sizeof(uint16_t) * count);
memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
sizeof(container_t *) * count);
memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
@@ -19036,10 +25219,9 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
ra->size += distance;
}
-
void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
size_t ctr = 0;
- int32_t i; for (i = 0; i < ra->size; ++i) {
+ int32_t i; for(i = 0; i < ra->size; ++i) {
int num_added = container_to_uint32_array(
ans + ctr, ra->containers[i], ra->typecodes[i],
((uint32_t)ra->keys[i]) << 16);
@@ -19047,7 +25229,8 @@ void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
}
}
-bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
+bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset,
+ size_t limit, uint32_t *ans) {
size_t ctr = 0;
size_t dtr = 0;
@@ -19059,10 +25242,9 @@ bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limi
uint32_t *t_ans = NULL;
size_t cur_len = 0;
- int i; for (i = 0; i < ra->size; ++i) {
-
- const container_t *c = container_unwrap_shared(
- ra->containers[i], &ra->typecodes[i]);
+ int i; for(i = 0; i < ra->size; ++i) {
+ const container_t *c =
+ container_unwrap_shared(ra->containers[i], &ra->typecodes[i]);
switch (ra->typecodes[i]) {
case BITSET_CONTAINER_TYPE:
t_limit = (const_CAST_bitset(c))->cardinality;
@@ -19074,25 +25256,28 @@ bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limi
t_limit = run_container_cardinality(const_CAST_run(c));
break;
}
- if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
- if (!first){
- //first_skip = t_limit - (ctr + t_limit - offset);
+ if (ctr + t_limit - 1 >= offset && ctr < offset + limit) {
+ if (!first) {
+ // first_skip = t_limit - (ctr + t_limit - offset);
first_skip = offset - ctr;
first = true;
- t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * (first_skip + limit));
- if(t_ans == NULL) {
- return false;
+ t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) *
+ (first_skip + limit));
+ if (t_ans == NULL) {
+ return false;
}
- memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ;
+ memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit));
cur_len = first_skip + limit;
}
- if (dtr + t_limit > cur_len){
- uint32_t * append_ans = (uint32_t *)roaring_malloc(sizeof(*append_ans) * (cur_len + t_limit));
- if(append_ans == NULL) {
- if(t_ans != NULL) roaring_free(t_ans);
- return false;
+ if (dtr + t_limit > cur_len) {
+ uint32_t *append_ans = (uint32_t *)roaring_malloc(
+ sizeof(*append_ans) * (cur_len + t_limit));
+ if (append_ans == NULL) {
+ if (t_ans != NULL) roaring_free(t_ans);
+ return false;
}
- memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit));
+ memset(append_ans, 0,
+ sizeof(*append_ans) * (cur_len + t_limit));
cur_len = cur_len + t_limit;
memcpy(append_ans, t_ans, dtr * sizeof(uint32_t));
roaring_free(t_ans);
@@ -19100,38 +25285,35 @@ bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limi
}
switch (ra->typecodes[i]) {
case BITSET_CONTAINER_TYPE:
- container_to_uint32_array(
- t_ans + dtr,
- const_CAST_bitset(c), ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
+ container_to_uint32_array(t_ans + dtr, const_CAST_bitset(c),
+ ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
break;
case ARRAY_CONTAINER_TYPE:
- container_to_uint32_array(
- t_ans + dtr,
- const_CAST_array(c), ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
+ container_to_uint32_array(t_ans + dtr, const_CAST_array(c),
+ ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
break;
case RUN_CONTAINER_TYPE:
- container_to_uint32_array(
- t_ans + dtr,
- const_CAST_run(c), ra->typecodes[i],
- ((uint32_t)ra->keys[i]) << 16);
+ container_to_uint32_array(t_ans + dtr, const_CAST_run(c),
+ ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
break;
}
dtr += t_limit;
}
ctr += t_limit;
- if (dtr-first_skip >= limit) break;
+ if (dtr - first_skip >= limit) break;
}
- if(t_ans != NULL) {
- memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t));
- free(t_ans);
+ if (t_ans != NULL) {
+ memcpy(ans, t_ans + first_skip, limit * sizeof(uint32_t));
+ free(t_ans);
}
return true;
}
bool ra_has_run_container(const roaring_array_t *ra) {
- int32_t k; for ( k = 0; k < ra->size; ++k) {
+ int32_t k; for(k = 0; k < ra->size; ++k) {
if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
RUN_CONTAINER_TYPE)
return true;
@@ -19155,7 +25337,7 @@ uint32_t ra_portable_header_size(const roaring_array_t *ra) {
size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
size_t count = ra_portable_header_size(ra);
- int32_t k; for ( k = 0; k < ra->size; ++k) {
+ int32_t k; for(k = 0; k < ra->size; ++k) {
count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
}
return count;
@@ -19167,13 +25349,13 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
uint32_t startOffset = 0;
bool hasrun = ra_has_run_container(ra);
if (hasrun) {
- uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
+ uint32_t cookie = SERIAL_COOKIE | ((uint32_t)(ra->size - 1) << 16);
memcpy(buf, &cookie, sizeof(cookie));
buf += sizeof(cookie);
uint32_t s = (ra->size + 7) / 8;
uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1);
assert(bitmapOfRunContainers != NULL); // todo: handle
- int32_t i; for (i = 0; i < ra->size; ++i) {
+ int32_t i; for(i = 0; i < ra->size; ++i) {
if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
RUN_CONTAINER_TYPE) {
bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
@@ -19197,19 +25379,20 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
}
- int32_t k; for ( k = 0; k < ra->size; ++k) {
+ int32_t k; for(k = 0; k < ra->size; ++k) {
memcpy(buf, &ra->keys[k], sizeof(ra->keys[k]));
buf += sizeof(ra->keys[k]);
// get_cardinality returns a value in [1,1<<16], subtracting one
// we get [0,1<<16 - 1] which fits in 16 bits
- uint16_t card = (uint16_t)(
- container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1);
+ uint16_t card = (uint16_t)(container_get_cardinality(ra->containers[k],
+ ra->typecodes[k]) -
+ 1);
memcpy(buf, &card, sizeof(card));
buf += sizeof(card);
}
if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
// writing the containers offsets
- int32_t k; for ( k = 0; k < ra->size; k++) {
+ int32_t k; for(k = 0; k < ra->size; k++) {
memcpy(buf, &startOffset, sizeof(startOffset));
buf += sizeof(startOffset);
startOffset =
@@ -19217,7 +25400,7 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
}
}
- for ( k = 0; k < ra->size; ++k) {
+ for(k = 0; k < ra->size; ++k) {
buf += container_write(ra->containers[k], ra->typecodes[k], buf);
}
return buf - initbuf;
@@ -19231,8 +25414,8 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
// Otherwise, it returns how many bytes are occupied.
//
size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
- size_t bytestotal = sizeof(int32_t);// for cookie
- if(bytestotal > maxbytes) return 0;
+ size_t bytestotal = sizeof(int32_t); // for cookie
+ if (bytestotal > maxbytes) return 0;
uint32_t cookie;
memcpy(&cookie, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
@@ -19246,88 +25429,90 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
size = (cookie >> 16) + 1;
else {
bytestotal += sizeof(int32_t);
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
memcpy(&size, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
}
- if (size > (1<<16)) {
- return 0; // logically impossible
+ if (size > (1 << 16)) {
+ return 0;
}
char *bitmapOfRunContainers = NULL;
bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
if (hasrun) {
int32_t s = (size + 7) / 8;
bytestotal += s;
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
bitmapOfRunContainers = (char *)buf;
buf += s;
}
bytestotal += size * 2 * sizeof(uint16_t);
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
uint16_t *keyscards = (uint16_t *)buf;
buf += size * 2 * sizeof(uint16_t);
if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
// skipping the offsets
bytestotal += size * 4;
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
buf += size * 4;
}
// Reading the containers
- int32_t k; for ( k = 0; k < size; ++k) {
+ int32_t k; for(k = 0; k < size; ++k) {
uint16_t tmp;
- memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
+ memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp));
uint32_t thiscard = tmp + 1;
bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
bool isrun = false;
- if(hasrun) {
- if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
- isbitmap = false;
- isrun = true;
- }
+ if (hasrun) {
+ if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
}
if (isbitmap) {
- size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ size_t containersize =
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
bytestotal += containersize;
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
buf += containersize;
} else if (isrun) {
bytestotal += sizeof(uint16_t);
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
uint16_t n_runs;
memcpy(&n_runs, buf, sizeof(uint16_t));
buf += sizeof(uint16_t);
size_t containersize = n_runs * sizeof(rle16_t);
bytestotal += containersize;
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
buf += containersize;
} else {
size_t containersize = thiscard * sizeof(uint16_t);
bytestotal += containersize;
- if(bytestotal > maxbytes) return 0;
+ if (bytestotal > maxbytes) return 0;
buf += containersize;
}
}
return bytestotal;
}
-// this function populates answer from the content of buf (reading up to maxbytes bytes).
-// The function returns false if a properly serialized bitmap cannot be found.
-// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
+// This function populates answer from the content of buf (reading up to
+// maxbytes bytes). The function returns false if a properly serialized bitmap
+// cannot be found. If it returns true, readbytes is populated by how many bytes
+// were read, we have that *readbytes <= maxbytes.
//
// This function is endian-sensitive.
-bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
- *readbytes = sizeof(int32_t);// for cookie
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
- return false;
+bool ra_portable_deserialize(roaring_array_t *answer, const char *buf,
+ const size_t maxbytes, size_t *readbytes) {
+ *readbytes = sizeof(int32_t); // for cookie
+ if (*readbytes > maxbytes) {
+ // Ran out of bytes while reading first 4 bytes.
+ return false;
}
uint32_t cookie;
memcpy(&cookie, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
- fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
- cookie);
+ // "I failed to find one of the right cookies.
return false;
}
int32_t size;
@@ -19336,31 +25521,30 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size = (cookie >> 16) + 1;
else {
*readbytes += sizeof(int32_t);
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
- return false;
+ if (*readbytes > maxbytes) {
+ // Ran out of bytes while reading second part of the cookie.
+ return false;
}
memcpy(&size, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
}
if (size < 0) {
- fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n",
- size);
- return false; // logically impossible
+ // You cannot have a negative number of containers, the data must be
+ // corrupted.
+ return false;
}
- if (size > (1<<16)) {
- fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
- size);
- return false; // logically impossible
+ if (size > (1 << 16)) {
+ // You cannot have so many containers, the data must be corrupted.
+ return false;
}
const char *bitmapOfRunContainers = NULL;
bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
if (hasrun) {
int32_t s = (size + 7) / 8;
*readbytes += s;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
- return false;
+ if (*readbytes > maxbytes) { // data is corrupted?
+ // Ran out of bytes while reading run bitmap.
+ return false;
}
bitmapOfRunContainers = buf;
buf += s;
@@ -19368,62 +25552,66 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
uint16_t *keyscards = (uint16_t *)buf;
*readbytes += size * 2 * sizeof(uint16_t);
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
- return false;
+ if (*readbytes > maxbytes) {
+ // Ran out of bytes while reading key-cardinality array.
+ return false;
}
buf += size * 2 * sizeof(uint16_t);
bool is_ok = ra_init_with_capacity(answer, size);
if (!is_ok) {
- fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
+ // Failed to allocate memory for roaring array. Bailing out.
return false;
}
- int32_t k; for ( k = 0; k < size; ++k) {
+ int32_t k; for(k = 0; k < size; ++k) {
uint16_t tmp;
- memcpy(&tmp, keyscards + 2*k, sizeof(tmp));
+ memcpy(&tmp, keyscards + 2 * k, sizeof(tmp));
answer->keys[k] = tmp;
}
if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
*readbytes += size * 4;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Ran out of bytes while reading offsets.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (*readbytes > maxbytes) { // data is corrupted?
+ // Ran out of bytes while reading offsets.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
// skipping the offsets
buf += size * 4;
}
// Reading the containers
- for ( k = 0; k < size; ++k) {
+ for(k = 0; k < size; ++k) {
uint16_t tmp;
- memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
+ memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp));
uint32_t thiscard = tmp + 1;
bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
bool isrun = false;
- if(hasrun) {
- if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
- isbitmap = false;
- isrun = true;
- }
+ if (hasrun) {
+ if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
}
if (isbitmap) {
// we check that the read is allowed
- size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ size_t containersize =
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
*readbytes += containersize;
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (*readbytes > maxbytes) {
+ // Running out of bytes while reading a bitset container.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
// it is now safe to read
bitset_container_t *c = bitset_container_create();
- if(c == NULL) {// memory allocation failure
- fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (c == NULL) { // memory allocation failure
+ // Failed to allocate memory for a bitset container.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
answer->size++;
buf += bitset_container_read(thiscard, c, buf);
@@ -19432,27 +25620,30 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
} else if (isrun) {
// we check that the read is allowed
*readbytes += sizeof(uint16_t);
- if(*readbytes > maxbytes) {
- fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (*readbytes > maxbytes) {
+ // Running out of bytes while reading a run container (header).
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
uint16_t n_runs;
memcpy(&n_runs, buf, sizeof(uint16_t));
size_t containersize = n_runs * sizeof(rle16_t);
*readbytes += containersize;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Running out of bytes while reading a run container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (*readbytes > maxbytes) { // data is corrupted?
+ // Running out of bytes while reading a run container.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
// it is now safe to read
run_container_t *c = run_container_create();
- if(c == NULL) {// memory allocation failure
- fprintf(stderr, "Failed to allocate memory for a run container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (c == NULL) { // memory allocation failure
+ // Failed to allocate memory for a run container.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
answer->size++;
buf += run_container_read(thiscard, c, buf);
@@ -19462,18 +25653,20 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
// we check that the read is allowed
size_t containersize = thiscard * sizeof(uint16_t);
*readbytes += containersize;
- if(*readbytes > maxbytes) {// data is corrupted?
- fprintf(stderr, "Running out of bytes while reading an array container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (*readbytes > maxbytes) { // data is corrupted?
+ // Running out of bytes while reading an array container.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
// it is now safe to read
array_container_t *c =
array_container_create_given_capacity(thiscard);
- if(c == NULL) {// memory allocation failure
- fprintf(stderr, "Failed to allocate memory for an array container.\n");
- ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
- return false;
+ if (c == NULL) { // memory allocation failure
+ // Failed to allocate memory for an array container.
+ ra_clear(answer); // we need to clear the containers already
+ // allocated, and the roaring array
+ return false;
}
answer->size++;
buf += array_container_read(thiscard, c, buf);
@@ -19485,16 +25678,19 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
+}
+}
+} // extern "C" { namespace roaring { namespace internal {
#endif
/* end file src/roaring_array.c */
/* begin file src/roaring_priority_queue.c */
-
#ifdef __cplusplus
using namespace ::roaring::internal;
-extern "C" { namespace roaring { namespace api {
+extern "C" {
+namespace roaring {
+namespace api {
#endif
struct roaring_pq_element_s {
@@ -19529,9 +25725,7 @@ static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) {
pq->elements[i] = *t;
}
-static void pq_free(roaring_pq_t *pq) {
- roaring_free(pq);
-}
+static void pq_free(roaring_pq_t *pq) { roaring_free(pq); }
static void percolate_down(roaring_pq_t *pq, uint32_t i) {
uint32_t size = (uint32_t)pq->size;
@@ -19557,21 +25751,20 @@ static void percolate_down(roaring_pq_t *pq, uint32_t i) {
}
static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) {
- size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length;
+ size_t alloc_size =
+ sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length;
roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size);
answer->elements = (roaring_pq_element_t *)(answer + 1);
answer->size = length;
- uint32_t i; for ( i = 0; i < length; i++) {
+ uint32_t i; for(i = 0; i < length; i++) {
answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i];
answer->elements[i].is_temporary = false;
answer->elements[i].size =
roaring_bitmap_portable_size_in_bytes(arr[i]);
}
- {
- int i;
- for (i = (length >> 1); i >= 0; i--) {
- percolate_down(answer, i);
- }
+ int32_t j;
+ for(j = (length >> 1); j >= 0; j--) {
+ percolate_down(answer, j);
}
return answer;
}
@@ -19605,29 +25798,30 @@ static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
int pos1 = 0, pos2 = 0;
uint8_t type1, type2;
- uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
while (true) {
if (s1 == s2) {
// todo: unsharing can be inefficient as it may create a clone where
// none
// is needed, but it has the benefit of being easy to reason about.
- ra_unshare_container_at_index(&x1->high_low_container, pos1);
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ ra_unshare_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
assert(type1 != SHARED_CONTAINER_TYPE);
- ra_unshare_container_at_index(&x2->high_low_container, pos2);
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ ra_unshare_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
assert(type2 != SHARED_CONTAINER_TYPE);
container_t *c;
if ((type2 == BITSET_CONTAINER_TYPE) &&
- (type1 != BITSET_CONTAINER_TYPE)
- ){
+ (type1 != BITSET_CONTAINER_TYPE)) {
c = container_lazy_ior(c2, type2, c1, type1, &result_type);
container_free(c1, type1);
if (c != c2) {
@@ -19648,24 +25842,24 @@ static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
++pos2;
if (pos1 == length1) break;
if (pos2 == length2) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
} else if (s1 < s2) { // s1 < s2
- container_t *c1 = ra_get_container_at_index(
- &x1->high_low_container, pos1, &type1);
+ container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
+ (uint16_t)pos1, &type1);
ra_append(&answer->high_low_container, s1, c1, type1);
pos1++;
if (pos1 == length1) break;
- s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
} else { // s1 > s2
- container_t *c2 = ra_get_container_at_index(
- &x2->high_low_container, pos2, &type2);
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ (uint16_t)pos2, &type2);
ra_append(&answer->high_low_container, s2, c2, type2);
pos2++;
if (pos2 == length2) break;
- s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
}
}
if (pos1 == length1) {
@@ -19738,6 +25932,9 @@ roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
}
#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace api {
+}
+}
+} // extern "C" { namespace roaring { namespace api {
#endif
/* end file src/roaring_priority_queue.c */
+#endif /* USE_ROARING_V2 */
diff --git a/src/lib/third_party/src/roaring_v2.c b/src/lib/third_party/src/roaring_v2.c
new file mode 100644
index 000000000..1349f2b04
--- /dev/null
+++ b/src/lib/third_party/src/roaring_v2.c
@@ -0,0 +1,19747 @@
+#include "ndpi_config.h"
+
+#ifdef USE_ROARING_V2
+// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
+// Created by amalgamation.sh on 2023-02-12T11:34:02Z
+
+/*
+ * The CRoaring project is under a dual license (Apache/MIT).
+ * Users of the library may choose one or the other license.
+ */
+/*
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/*
+ * MIT License
+ *
+ * Copyright 2016-2022 The CRoaring authors
+ *
+ * Permission is hereby granted, free of charge, to any
+ * person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the
+ * Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software
+ * is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+ * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+ * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "roaring.h"
+
+/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
+#ifdef DMALLOC
+#include "dmalloc.h"
+#endif
+
+#include "roaring.h" /* include public API definitions */
+/* begin file include/roaring/portability.h */
+/*
+ * portability.h
+ *
+ */
+
+ /**
+ * All macros should be prefixed with either CROARING or ROARING.
+ * The library uses both ROARING_...
+ * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for
+ * macros that are provided by the build system or that are closely
+ * related to the format. The header macros may also use ROARING_.
+ * The CROARING_ prefix is for internal macros that a user is unlikely
+ * to ever interact with.
+ */
+
+#ifndef INCLUDE_PORTABILITY_H_
+#define INCLUDE_PORTABILITY_H_
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS 1
+#endif // __STDC_FORMAT_MACROS
+
+#ifdef _MSC_VER
+#define CROARING_VISUAL_STUDIO 1
+/**
+ * We want to differentiate carefully between
+ * clang under visual studio and regular visual
+ * studio.
+ */
+#ifdef __clang__
+// clang under visual studio
+#define CROARING_CLANG_VISUAL_STUDIO 1
+#else
+// just regular visual studio (best guess)
+#define CROARING_REGULAR_VISUAL_STUDIO 1
+#endif // __clang__
+#endif // _MSC_VER
+
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
+#undef _POSIX_C_SOURCE
+#endif
+
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
+#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
+#define _XOPEN_SOURCE 700
+#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h> // will provide posix_memalign with _POSIX_C_SOURCE as defined above
+#ifdef __GLIBC__
+#include <malloc.h> // this should never be needed but there are some reports that it is needed.
+#endif
+
+#ifdef __cplusplus
+extern "C" { // portability definitions are in global scope, not a namespace
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO && !defined(_WIN64) && !defined(CROARING_ACK_32BIT)
+#pragma message( \
+ "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.")
+#endif
+
+#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
+#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+#define __restrict__ __restrict
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+
+
+#if defined(__x86_64__) || defined(_M_X64)
+// we have an x64 processor
+#define CROARING_IS_X64 1
+
+#if defined(_MSC_VER) && (_MSC_VER < 1910)
+// Old visual studio systems won't support AVX2 well.
+#undef CROARING_IS_X64
+#endif
+
+#if defined(__clang_major__) && (__clang_major__<= 8) && !defined(__AVX2__)
+// Older versions of clang have a bug affecting us
+// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces
+#undef CROARING_IS_X64
+#endif
+
+#ifdef CROARING_DISABLE_X64
+#undef CROARING_IS_X64
+#endif
+
+
+#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
+ /* RH 7 don't have atomic includes */
+#undef CROARING_IS_X64
+#undef ALLOW_UNALIGNED
+#define ALLOW_UNALIGNED
+#endif
+
+
+
+// we include the intrinsic header
+#if !CROARING_REGULAR_VISUAL_STUDIO
+/* Non-Microsoft C/C++-compatible compiler */
+#include <x86intrin.h> // on some recent GCC, this will declare posix_memalign
+
+
+
+#ifdef CROARING_CLANG_VISUAL_STUDIO
+
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn, we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h> (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+#include <bmiintrin.h> // for _blsr_u64
+#include <lzcntintrin.h> // for __lzcnt64
+#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif // _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+#endif // defined(__x86_64__) || defined(_M_X64)
+
+#if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
+# define USENEON
+#endif
+#if defined(USENEON)
+# include <arm_neon.h>
+#endif
+
+#if !CROARING_REGULAR_VISUAL_STUDIO
+/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
+ * assembly */
+#define CROARING_INLINE_ASM 1
+#endif // _MSC_VER
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+/* Microsoft C/C++-compatible compiler */
+#include <intrin.h>
+
+#ifndef __clang__ // if one compiles with MSVC *with* clang, then these
+ // intrinsics are defined!!!
+// sadly there is no way to check whether we are missing these intrinsics
+// specifically.
+
+/* wrappers for Visual Studio built-ins that look like gcc built-ins */
+/* result might be undefined when input_num is zero */
+inline int __builtin_ctzll(unsigned long long input_num) {
+ unsigned long index;
+#ifdef _WIN64 // highly recommended!!!
+ _BitScanForward64(&index, input_num);
+#else // if we must support 32-bit Windows
+ if ((uint32_t)input_num != 0) {
+ _BitScanForward(&index, (uint32_t)input_num);
+ } else {
+ _BitScanForward(&index, (uint32_t)(input_num >> 32));
+ index += 32;
+ }
+#endif
+ return index;
+}
+
+/* result might be undefined when input_num is zero */
+inline int __builtin_clzll(unsigned long long input_num) {
+ unsigned long index;
+#ifdef _WIN64 // highly recommended!!!
+ _BitScanReverse64(&index, input_num);
+#else // if we must support 32-bit Windows
+ if (input_num > 0xFFFFFFFF) {
+ _BitScanReverse(&index, (uint32_t)(input_num >> 32));
+ index += 32;
+ } else {
+ _BitScanReverse(&index, (uint32_t)(input_num));
+ }
+#endif
+ return 63 - index;
+}
+
+
+/* software implementation avoids POPCNT */
+/*static inline int __builtin_popcountll(unsigned long long input_num) {
+ const uint64_t m1 = 0x5555555555555555; //binary: 0101...
+ const uint64_t m2 = 0x3333333333333333; //binary: 00110011..
+ const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ...
+ const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
+
+ input_num -= (input_num >> 1) & m1;
+ input_num = (input_num & m2) + ((input_num >> 2) & m2);
+ input_num = (input_num + (input_num >> 4)) & m4;
+ return (input_num * h01) >> 56;
+}*/
+
+/* Use #define so this is effective even under /Ob0 (no inline) */
+#define __builtin_unreachable() __assume(0)
+#endif
+
+#endif
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+#define ALIGNED(x) __declspec(align(x))
+#elif defined(__GNUC__) || defined(__clang__)
+#define ALIGNED(x) __attribute__((aligned(x)))
+#else
+#warning "Warning. Unrecognized compiler."
+#define ALIGNED(x)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define WARN_UNUSED __attribute__((warn_unused_result))
+#else
+#define WARN_UNUSED
+#endif
+
+#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
+
+#ifdef USENEON
+// we can always compute the popcount fast.
+#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
+// we will need this function:
+static inline int hammingbackup(uint64_t x) {
+ uint64_t c1 = UINT64_C(0x5555555555555555);
+ uint64_t c2 = UINT64_C(0x3333333333333333);
+ uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F);
+ x -= (x >> 1) & c1;
+ x = (( x >> 2) & c2) + (x & c2); x=(x +(x>>4))&c4;
+ x *= UINT64_C(0x0101010101010101);
+ return x >> 56;
+}
+#endif
+
+
+static inline int hamming(uint64_t x) {
+#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
+#ifdef USENEON
+ return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+#elif defined(_M_ARM64)
+ return hammingbackup(x);
+ // (int) _CountOneBits64(x); is unavailable
+#else // _M_ARM64
+ return (int) __popcnt64(x);
+#endif // _M_ARM64
+#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
+#ifdef _M_ARM
+ return hammingbackup(x);
+ // _CountOneBits is unavailable
+#else // _M_ARM
+ return (int) __popcnt(( unsigned int)x) + (int) __popcnt(( unsigned int)(x>>32));
+#endif // _M_ARM
+#else
+ return __builtin_popcountll(x);
+#endif
+}
+
+#ifndef UINT64_C
+#define UINT64_C(c) (c##ULL)
+#endif // UINT64_C
+
+#ifndef UINT32_C
+#define UINT32_C(c) (c##UL)
+#endif // UINT32_C
+
+#ifdef __cplusplus
+} // extern "C" {
+#endif // __cplusplus
+
+
+// this is almost standard?
+#undef STRINGIFY_IMPLEMENTATION_
+#undef STRINGIFY
+#define STRINGIFY_IMPLEMENTATION_(a) #a
+#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)
+
+// Our fast kernels require 64-bit systems.
+//
+// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
+// Furthermore, the number of SIMD registers is reduced.
+//
+// On 32-bit ARM, we would have smaller registers.
+//
+// The library should still have the fallback kernel. It is
+// slower, but it should run everywhere.
+
+//
+// Enable valid runtime implementations, and select CROARING_BUILTIN_IMPLEMENTATION
+//
+
+// We are going to use runtime dispatch.
+#ifdef CROARING_IS_X64
+#ifdef __clang__
+// clang does not have GCC push pop
+// warning: clang attribute push can't be used within a namespace in clang up
+// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be *outside* of a
+// namespace.
+#define CROARING_TARGET_REGION(T) \
+ _Pragma(STRINGIFY( \
+ clang attribute push(__attribute__((target(T))), apply_to = function)))
+#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop")
+#elif defined(__GNUC__)
+// GCC is easier
+#define CROARING_TARGET_REGION(T) \
+ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
+#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options")
+#endif // clang then gcc
+
+#endif // CROARING_IS_X64
+
+// Default target region macros don't do anything.
+#ifndef CROARING_TARGET_REGION
+#define CROARING_TARGET_REGION(T)
+#define CROARING_UNTARGET_REGION
+#endif
+
+#define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
+
+#ifdef __AVX2__
+// No need for runtime dispatching.
+// It is unnecessary and harmful to old clang to tag regions.
+#undef CROARING_TARGET_AVX2
+#define CROARING_TARGET_AVX2
+#undef CROARING_UNTARGET_REGION
+#define CROARING_UNTARGET_REGION
+#endif
+
+#ifndef ALLOW_UNALIGNED
+// Allow unaligned memory access
+#if defined(__GNUC__) || defined(__clang__)
+#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
+#else
+#define ALLOW_UNALIGNED
+#endif
+#endif
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #elif defined(_WIN32)
+ #define CROARING_IS_BIG_ENDIAN 0
+ #else
+ #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+ #include <machine/endian.h>
+ #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
+ #include <sys/byteorder.h>
+ #else // defined(__APPLE__) || defined(__FreeBSD__)
+
+ #ifdef __has_include
+ #if __has_include(<endian.h>)
+ #include <endian.h>
+ #endif //__has_include(<endian.h>)
+ #endif //__has_include
+
+ #endif // defined(__APPLE__) || defined(__FreeBSD__)
+
+
+ #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
+ #define CROARING_IS_BIG_ENDIAN 0
+ #endif
+
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define CROARING_IS_BIG_ENDIAN 0
+ #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define CROARING_IS_BIG_ENDIAN 1
+ #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#endif
+
+// We need portability.h to be included first,
+// but we also always want isadetection.h to be
+// included (right after).
+// See https://github.com/RoaringBitmap/CRoaring/issues/394
+// There is no scenario where we want portability.h to
+// be included, but not isadetection.h: the latter is a
+// strict requirement.
+#endif /* INCLUDE_PORTABILITY_H_ */
+/* end file include/roaring/portability.h */
+/* begin file include/roaring/isadetection.h */
+/* From
+https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
+Highly modified.
+
+Copyright (c) 2016- Facebook, Inc (Adam Paszke)
+Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
+Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
+(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
+Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
+America and IDIAP Research Institute nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ROARING_ISADETECTION_H
+#define ROARING_ISADETECTION_H
+
+// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H).
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+// We need portability.h to be included first, see
+// https://github.com/RoaringBitmap/CRoaring/issues/394
+#if CROARING_REGULAR_VISUAL_STUDIO
+#include <intrin.h>
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+#include <cpuid.h>
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+
+enum croaring_instruction_set {
+ CROARING_DEFAULT = 0x0,
+ CROARING_NEON = 0x1,
+ CROARING_AVX2 = 0x4,
+ CROARING_SSE42 = 0x8,
+ CROARING_PCLMULQDQ = 0x10,
+ CROARING_BMI1 = 0x20,
+ CROARING_BMI2 = 0x40,
+ CROARING_ALTIVEC = 0x80,
+ CROARING_UNINITIALIZED = 0x8000
+};
+
+#if defined(__PPC64__)
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_ALTIVEC;
+//}
+
+#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
+
+#if defined(__ARM_NEON)
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_NEON;
+//}
+
+#else // ARM without NEON
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_DEFAULT;
+//}
+
+#endif
+
+#elif defined(__x86_64__) || defined(_M_AMD64) // x64
+
+
+
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+ uint32_t *edx) {
+
+#if CROARING_REGULAR_VISUAL_STUDIO
+ int cpu_info[4];
+ __cpuid(cpu_info, *eax);
+ *eax = cpu_info[0];
+ *ebx = cpu_info[1];
+ *ecx = cpu_info[2];
+ *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+ uint32_t level = *eax;
+ __get_cpuid(level, eax, ebx, ecx, edx);
+#else
+ uint32_t a = *eax, b, c = *ecx, d;
+ __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+ *eax = a;
+ *ebx = b;
+ *ecx = c;
+ *edx = d;
+#endif
+}
+
+static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t host_isa = 0x0;
+ // Can be found on Intel ISA Reference for CPUID
+ static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7
+ static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7
+ static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7
+ static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1
+ static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1
+ // ECX for EAX=0x7
+ eax = 0x7;
+ ecx = 0x0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx & cpuid_avx2_bit) {
+ host_isa |= CROARING_AVX2;
+ }
+ if (ebx & cpuid_bmi1_bit) {
+ host_isa |= CROARING_BMI1;
+ }
+
+ if (ebx & cpuid_bmi2_bit) {
+ host_isa |= CROARING_BMI2;
+ }
+
+ // EBX for EAX=0x1
+ eax = 0x1;
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (ecx & cpuid_sse42_bit) {
+ host_isa |= CROARING_SSE42;
+ }
+
+ if (ecx & cpuid_pclmulqdq_bit) {
+ host_isa |= CROARING_PCLMULQDQ;
+ }
+
+ return host_isa;
+}
+#else // fallback
+
+
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+// return CROARING_DEFAULT;
+//}
+
+
+#endif // end SIMD extension detection code
+
+
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#if defined(__cplusplus)
+static inline uint32_t croaring_detect_supported_architectures() {
+ // thread-safe as per the C++11 standard.
+ static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
+ return buffer;
+}
+#elif CROARING_VISUAL_STUDIO
+// Visual Studio does not support C11 atomics.
+static inline uint32_t croaring_detect_supported_architectures() {
+ static int buffer = CROARING_UNINITIALIZED;
+ if (buffer == CROARING_UNINITIALIZED) {
+ buffer = dynamic_croaring_detect_supported_architectures();
+ }
+ return buffer;
+}
+#else // CROARING_VISUAL_STUDIO
+
+
+#if (defined(__GNUC_RH_RELEASE__) && (__GNUC_RH_RELEASE__ != 5)) || (__GNUC__ < 5)
+#define ROARING_DISABLE_AVX
+#undef __AVX2__
+/* CentOS 7 */
+static inline uint32_t croaring_detect_supported_architectures() {
+ return(dynamic_croaring_detect_supported_architectures());
+}
+#else
+
+#include <stdatomic.h>
+static inline uint32_t croaring_detect_supported_architectures() {
+ // we use an atomic for thread safety
+ static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
+ if (buffer == CROARING_UNINITIALIZED) {
+ // atomicity is sufficient
+ buffer = dynamic_croaring_detect_supported_architectures();
+ }
+ return buffer;
+}
+#endif
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+#ifdef ROARING_DISABLE_AVX
+static inline bool croaring_avx2() {
+ return false;
+}
+#elif defined(__AVX2__)
+static inline bool croaring_avx2() {
+ return true;
+}
+#else
+static inline bool croaring_avx2() {
+ return (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
+}
+#endif
+
+
+#else // defined(__x86_64__) || defined(_M_AMD64) // x64
+
+//static inline bool croaring_avx2() {
+// return false;
+//}
+
+//static inline uint32_t croaring_detect_supported_architectures() {
+// // no runtime dispatch
+// return dynamic_croaring_detect_supported_architectures();
+//}
+#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#endif // ROARING_ISADETECTION_H
+/* end file include/roaring/isadetection.h */
+/* begin file include/roaring/containers/perfparameters.h */
+#ifndef PERFPARAMETERS_H_
+#define PERFPARAMETERS_H_
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/**
+During lazy computations, we can transform array containers into bitset
+containers as
+long as we can expect them to have ARRAY_LAZY_LOWERBOUND values.
+*/
+enum { ARRAY_LAZY_LOWERBOUND = 1024 };
+
+/* default initial size of a run container
+ setting it to zero delays the malloc.*/
+enum { RUN_DEFAULT_INIT_SIZE = 0 };
+
+/* default initial size of an array container
+ setting it to zero delays the malloc */
+enum { ARRAY_DEFAULT_INIT_SIZE = 0 };
+
+/* automatic bitset conversion during lazy or */
+#ifndef LAZY_OR_BITSET_CONVERSION
+#define LAZY_OR_BITSET_CONVERSION true
+#endif
+
+/* automatically attempt to convert a bitset to a full run during lazy
+ * evaluation */
+#ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL
+#define LAZY_OR_BITSET_CONVERSION_TO_FULL true
+#endif
+
+/* automatically attempt to convert a bitset to a full run */
+#ifndef OR_BITSET_CONVERSION_TO_FULL
+#define OR_BITSET_CONVERSION_TO_FULL true
+#endif
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif
+/* end file include/roaring/containers/perfparameters.h */
+/* begin file include/roaring/containers/container_defs.h */
+/*
+ * container_defs.h
+ *
+ * Unlike containers.h (which is a file aggregating all the container includes,
+ * like array.h, bitset.h, and run.h) this is a file included BY those headers
+ * to do things like define the container base class `container_t`.
+ */
+
+#ifndef INCLUDE_CONTAINERS_CONTAINER_DEFS_H_
+#define INCLUDE_CONTAINERS_CONTAINER_DEFS_H_
+
+#ifdef __cplusplus
+ #include <type_traits> // used by casting helper for compile-time check
+#endif
+
+// The preferences are a separate file to separate out tweakable parameters
+
+#ifdef __cplusplus
+namespace roaring { namespace internal { // No extern "C" (contains template)
+#endif
+
+
+/*
+ * Since roaring_array_t's definition is not opaque, the container type is
+ * part of the API. If it's not going to be `void*` then it needs a name, and
+ * expectations are to prefix C library-exported names with `roaring_` etc.
+ *
+ * Rather than force the whole codebase to use the name `roaring_container_t`,
+ * the few API appearances use the macro ROARING_CONTAINER_T. Those includes
+ * are prior to containers.h, so make a short private alias of `container_t`.
+ * Then undefine the awkward macro so it's not used any more than it has to be.
+ */
+typedef ROARING_CONTAINER_T container_t;
+#undef ROARING_CONTAINER_T
+
+
+/*
+ * See ROARING_CONTAINER_T for notes on using container_t as a base class.
+ * This macro helps make the following pattern look nicer:
+ *
+ * #ifdef __cplusplus
+ * struct roaring_array_s : public container_t {
+ * #else
+ * struct roaring_array_s {
+ * #endif
+ * int32_t cardinality;
+ * int32_t capacity;
+ * uint16_t *array;
+ * }
+ */
+#if defined(__cplusplus)
+ #define STRUCT_CONTAINER(name) \
+ struct name : public container_t /* { ... } */
+#else
+ #define STRUCT_CONTAINER(name) \
+ struct name /* { ... } */
+#endif
+
+
+/**
+ * Since container_t* is not void* in C++, "dangerous" casts are not needed to
+ * downcast; only a static_cast<> is needed. Define a macro for static casting
+ * which helps make casts more visible, and catches problems at compile-time
+ * when building the C sources in C++ mode:
+ *
+ * void some_func(container_t **c, ...) { // double pointer, not single
+ * array_container_t *ac1 = (array_container_t *)(c); // uncaught!!
+ *
+ * array_container_t *ac2 = CAST(array_container_t *, c) // C++ errors
+ * array_container_t *ac3 = CAST_array(c); // shorthand for #2, errors
+ * }
+ *
+ * Trickier to do is a cast from `container**` to `array_container_t**`. This
+ * needs a reinterpret_cast<>, which sacrifices safety...so a template is used
+ * leveraging <type_traits> to make sure it's legal in the C++ build.
+ */
+#ifdef __cplusplus
+ #define CAST(type,value) static_cast<type>(value)
+ #define movable_CAST(type,value) movable_CAST_HELPER<type>(value)
+
+ template<typename PPDerived, typename Base>
+ PPDerived movable_CAST_HELPER(Base **ptr_to_ptr) {
+ typedef typename std::remove_pointer<PPDerived>::type PDerived;
+ typedef typename std::remove_pointer<PDerived>::type Derived;
+ static_assert(
+ std::is_base_of<Base, Derived>::value,
+ "use movable_CAST() for container_t** => xxx_container_t**"
+ );
+ return reinterpret_cast<Derived**>(ptr_to_ptr);
+ }
+#else
+ #define CAST(type,value) ((type)value)
+ #define movable_CAST(type, value) ((type)value)
+#endif
+
+// Use for converting e.g. an `array_container_t**` to a `container_t**`
+//
+#define movable_CAST_base(c) movable_CAST(container_t **, c)
+
+
+#ifdef __cplusplus
+} } // namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_CONTAINER_DEFS_H_ */
+/* end file include/roaring/containers/container_defs.h */
+/* begin file include/roaring/array_util.h */
+#ifndef ARRAY_UTIL_H
+#define ARRAY_UTIL_H
+
+#include <stddef.h> // for size_t
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/*
+ * Good old binary search.
+ * Assumes that array is sorted, has logarithmic complexity.
+ * if the result is x, then:
+ * if ( x>0 ) you have array[x] = ikey
+ * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey)
+ * keys the array sorted.
+ */
+inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
+ uint16_t ikey) {
+ int32_t low = 0;
+ int32_t high = lenarray - 1;
+ while (low <= high) {
+ int32_t middleIndex = (low + high) >> 1;
+ uint16_t middleValue = array[middleIndex];
+ if (middleValue < ikey) {
+ low = middleIndex + 1;
+ } else if (middleValue > ikey) {
+ high = middleIndex - 1;
+ } else {
+ return middleIndex;
+ }
+ }
+ return -(low + 1);
+}
+
+/**
+ * Galloping search
+ * Assumes that array is sorted, has logarithmic complexity.
+ * if the result is x, then if x = length, you have that all values in array between pos and length
+ * are smaller than min.
+ * otherwise returns the first index x such that array[x] >= min.
+ */
+static inline int32_t advanceUntil(const uint16_t *array, int32_t pos,
+ int32_t length, uint16_t min) {
+ int32_t lower = pos + 1;
+
+ if ((lower >= length) || (array[lower] >= min)) {
+ return lower;
+ }
+
+ int32_t spansize = 1;
+
+ while ((lower + spansize < length) && (array[lower + spansize] < min)) {
+ spansize <<= 1;
+ }
+ int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
+
+ if (array[upper] == min) {
+ return upper;
+ }
+ if (array[upper] < min) {
+ // means
+ // array
+ // has no
+ // item
+ // >= min
+ // pos = array.length;
+ return length;
+ }
+
+ // we know that the next-smallest span was too small
+ lower += (spansize >> 1);
+
+ int32_t mid = 0;
+ while (lower + 1 != upper) {
+ mid = (lower + upper) >> 1;
+ if (array[mid] == min) {
+ return mid;
+ } else if (array[mid] < min) {
+ lower = mid;
+ } else {
+ upper = mid;
+ }
+ }
+ return upper;
+}
+
+/**
+ * Returns number of elements which are less then $ikey.
+ * Array elements must be unique and sorted.
+ */
+static inline int32_t count_less(const uint16_t *array, int32_t lenarray,
+ uint16_t ikey) {
+ if (lenarray == 0) return 0;
+ int32_t pos = binarySearch(array, lenarray, ikey);
+ return pos >= 0 ? pos : -(pos+1);
+}
+
+/**
+ * Returns number of elements which are greater then $ikey.
+ * Array elements must be unique and sorted.
+ */
+static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,
+ uint16_t ikey) {
+ if (lenarray == 0) return 0;
+ int32_t pos = binarySearch(array, lenarray, ikey);
+ if (pos >= 0) {
+ return lenarray - (pos+1);
+ } else {
+ return lenarray - (-pos-1);
+ }
+}
+
+/**
+ * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
+ * Optimized by D. Lemire on May 3rd 2013
+ *
+ * C should have capacity greater than the minimum of s_1 and s_b + 8
+ * where 8 is sizeof(__m128i)/sizeof(uint16_t).
+ */
+int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
+ const uint16_t *__restrict__ B, size_t s_b,
+ uint16_t *C);
+
+/**
+ * Compute the cardinality of the intersection using SSE4 instructions
+ */
+int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
+ size_t s_a,
+ const uint16_t *__restrict__ B,
+ size_t s_b);
+
+/* Computes the intersection between one small and one large set of uint16_t.
+ * Stores the result into buffer and return the number of elements. */
+int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s,
+ const uint16_t *largearray, size_t size_l,
+ uint16_t *buffer);
+
+/* Computes the size of the intersection between one small and one large set of
+ * uint16_t. */
+int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray,
+ size_t size_s,
+ const uint16_t *largearray,
+ size_t size_l);
+
+
+/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */
+bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s,
+ const uint16_t *largearray, size_t size_l);
+/**
+ * Generic intersection function.
+ */
+int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
+ const uint16_t *B, const size_t lenB, uint16_t *out);
+/**
+ * Compute the size of the intersection (generic).
+ */
+int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
+ const uint16_t *B, const size_t lenB);
+
+/**
+ * Checking whether the size of the intersection is non-zero.
+ */
+bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
+ const uint16_t *B, const size_t lenB);
+/**
+ * Generic union function.
+ */
+size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+ size_t size_2, uint16_t *buffer);
+
+/**
+ * Generic XOR function.
+ */
+int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
+ const uint16_t *array_2, int32_t card_2, uint16_t *out);
+
+/**
+ * Generic difference function (ANDNOT).
+ */
+int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
+ int length2, uint16_t *a_out);
+
+/**
+ * Generic intersection function.
+ */
+size_t intersection_uint32(const uint32_t *A, const size_t lenA,
+ const uint32_t *B, const size_t lenB, uint32_t *out);
+
+/**
+ * Generic intersection function, returns just the cardinality.
+ */
+size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
+ const uint32_t *B, const size_t lenB);
+
+/**
+ * Generic union function.
+ */
+size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
+ size_t size_2, uint32_t *buffer);
+
+/**
+ * A fast SSE-based union function.
+ */
+uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1,
+ const uint16_t *__restrict__ set_2, uint32_t size_2,
+ uint16_t *__restrict__ buffer);
+/**
+ * A fast SSE-based XOR function.
+ */
+uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
+ const uint16_t *__restrict__ array2, uint32_t length2,
+ uint16_t *__restrict__ output);
+
+/**
+ * A fast SSE-based difference function.
+ */
+int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
+ const uint16_t *__restrict__ B, size_t s_b,
+ uint16_t *C);
+
+/**
+ * Generic union function, returns just the cardinality.
+ */
+size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
+ const uint32_t *set_2, size_t size_2);
+
+/**
+* combines union_uint16 and union_vector16 optimally
+*/
+size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+ size_t size_2, uint16_t *buffer);
+
+
+bool memequals(const void *s1, const void *s2, size_t n);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif
+/* end file include/roaring/array_util.h */
+/* begin file include/roaring/utilasm.h */
+/*
+ * utilasm.h
+ *
+ */
+
+#ifndef INCLUDE_UTILASM_H_
+#define INCLUDE_UTILASM_H_
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring {
+#endif
+
+#if defined(CROARING_INLINE_ASM)
+#define CROARING_ASMBITMANIPOPTIMIZATION // optimization flag
+
+#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \
+ __asm volatile("shrx %1, %2, %0" \
+ : "=r"(destReg) \
+ : /* write */ \
+ "r"(bitsReg), /* read only */ \
+ "r"(srcReg) /* read only */ \
+ )
+
+#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \
+ __asm volatile("shrx %1, %0, %0" \
+ : "+r"(srcReg) \
+ : /* read/write */ \
+ "r"(bitsReg) /* read only */ \
+ )
+
+#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \
+ __asm volatile("shlx %1, %2, %0" \
+ : "=r"(destReg) \
+ : /* write */ \
+ "r"(bitsReg), /* read only */ \
+ "r"(srcReg) /* read only */ \
+ )
+// set bit at position testBit within testByte to 1 and
+// copy cmovDst to cmovSrc if that bit was previously clear
+#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \
+ __asm volatile( \
+ "bts %2, %0\n" \
+ "sbb $-1, %1\n" \
+ : "+r"(testByte), /* read/write */ \
+ "+r"(count) \
+ : /* read/write */ \
+ "r"(testBit) /* read only */ \
+ )
+
+#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \
+ __asm volatile( \
+ "btr %2, %0\n" \
+ "sbb $0, %1\n" \
+ : "+r"(testByte), /* read/write */ \
+ "+r"(count) \
+ : /* read/write */ \
+ "r"(testBit) /* read only */ \
+ )
+
+#define ASM_BT64(testByte, testBit, count) \
+ __asm volatile( \
+ "bt %2,%1\n" \
+ "sbb %0,%0" /*could use setb */ \
+ : "=r"(count) \
+ : /* write */ \
+ "r"(testByte), /* read only */ \
+ "r"(testBit) /* read only */ \
+ )
+
+#endif
+
+#ifdef __cplusplus
+} } // extern "C" { namespace roaring {
+#endif
+
+#endif /* INCLUDE_UTILASM_H_ */
+/* end file include/roaring/utilasm.h */
+/* begin file include/roaring/bitset_util.h */
+#ifndef BITSET_UTIL_H
+#define BITSET_UTIL_H
+
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/*
+ * Set all bits in indexes [begin,end) to true.
+ */
+static inline void bitset_set_range(uint64_t *words, uint32_t start,
+ uint32_t end) {
+
+ if (start == end) return;
+ uint32_t firstword = start / 64;
+ uint32_t endword = (end - 1) / 64, i;
+ if (firstword == endword) {
+ words[firstword] |= ((~UINT64_C(0)) << (start % 64)) &
+ ((~UINT64_C(0)) >> ((~end + 1) % 64));
+ return;
+ }
+ words[firstword] |= (~UINT64_C(0)) << (start % 64);
+ for (i = firstword + 1; i < endword; i++) {
+ words[i] = ~UINT64_C(0);
+ }
+ words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64);
+}
+
+
+/*
+ * Find the cardinality of the bitset in [begin,begin+lenminusone]
+ */
+static inline int bitset_lenrange_cardinality(const uint64_t *words,
+ uint32_t start,
+ uint32_t lenminusone) {
+ uint32_t firstword = start / 64;
+ uint32_t endword = (start + lenminusone) / 64, i;
+ if (firstword == endword) {
+ return hamming(words[firstword] &
+ ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
+ << (start % 64));
+ }
+ int answer = hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
+ for (i = firstword + 1; i < endword; i++) {
+ answer += hamming(words[i]);
+ }
+ answer +=
+ hamming(words[endword] &
+ (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64));
+ return answer;
+}
+
+/*
+ * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0
+ */
+static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start,
+ uint32_t lenminusone) {
+ uint32_t firstword = start / 64;
+ uint32_t endword = (start + lenminusone) / 64, i;
+ if (firstword == endword) {
+ return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
+ << (start % 64)) == 0;
+ }
+ if (((words[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) {
+ return false;
+ }
+ for ( i = firstword + 1; i < endword; i++) {
+ if (words[i] != 0) {
+ return false;
+ }
+ }
+ if ((words[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) {
+ return false;
+ }
+ return true;
+}
+
+
+/*
+ * Set all bits in indexes [begin,begin+lenminusone] to true.
+ */
+static inline void bitset_set_lenrange(uint64_t *words, uint32_t start,
+ uint32_t lenminusone) {
+ uint32_t firstword = start / 64;
+ uint32_t endword = (start + lenminusone) / 64, i;
+ if (firstword == endword) {
+ words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
+ << (start % 64);
+ return;
+ }
+ uint64_t temp = words[endword];
+ words[firstword] |= (~UINT64_C(0)) << (start % 64);
+ for ( i = firstword + 1; i < endword; i += 2)
+ words[i] = words[i + 1] = ~UINT64_C(0);
+ words[endword] =
+ temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
+}
+
+/*
+ * Flip all the bits in indexes [begin,end).
+ */
+static inline void bitset_flip_range(uint64_t *words, uint32_t start,
+ uint32_t end) {
+ if (start == end) return;
+ uint32_t firstword = start / 64;
+ uint32_t endword = (end - 1) / 64, i;
+ words[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
+ for ( i = firstword; i < endword; i++) {
+ words[i] = ~words[i];
+ }
+ words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
+}
+
+/*
+ * Set all bits in indexes [begin,end) to false.
+ */
+static inline void bitset_reset_range(uint64_t *words, uint32_t start,
+ uint32_t end) {
+ if (start == end) return;
+ uint32_t firstword = start / 64;
+ uint32_t endword = (end - 1) / 64, i;
+ if (firstword == endword) {
+ words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) &
+ ((~UINT64_C(0)) >> ((~end + 1) % 64)));
+ return;
+ }
+ words[firstword] &= ~((~UINT64_C(0)) << (start % 64));
+ for ( i = firstword + 1; i < endword; i++) {
+ words[i] = UINT64_C(0);
+ }
+ words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64));
+}
+
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out", values start at "base".
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ * set.
+ *
+ * Returns how many values were actually decoded.
+ *
+ * This function should only be expected to be faster than
+ * bitset_extract_setbits
+ * when the density of the bitset is high.
+ *
+ * This function uses AVX2 decoding.
+ */
+size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
+ uint32_t *out, size_t outcapacity,
+ uint32_t base);
+
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out", values start at "base".
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ */
+size_t bitset_extract_setbits(const uint64_t *words, size_t length,
+ uint32_t *out, uint32_t base);
+
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out" as 16-bit integers, values start at "base" (can
+ *be set to zero)
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ *
+ * This function should only be expected to be faster than
+ *bitset_extract_setbits_uint16
+ * when the density of the bitset is high.
+ *
+ * This function uses SSE decoding.
+ */
+size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
+ uint16_t *out, size_t outcapacity,
+ uint16_t base);
+
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out", values start at "base"
+ * (can be set to zero)
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ */
+size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
+ uint16_t *out, uint16_t base);
+
+/*
+ * Given two bitsets containing "length" 64-bit words, write out the position
+ * of all the common set bits to "out", values start at "base"
+ * (can be set to zero)
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ * set.
+ *
+ * Returns how many values were actually decoded.
+ */
+size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
+ const uint64_t * __restrict__ words2,
+ size_t length, uint16_t *out,
+ uint16_t base);
+
+/*
+ * Given a bitset having cardinality card, set all bit values in the list (there
+ * are length of them)
+ * and return the updated cardinality. This evidently assumes that the bitset
+ * already contained data.
+ */
+uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length);
+/*
+ * Given a bitset, set all bit values in the list (there
+ * are length of them).
+ */
+void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length);
+
+/*
+ * Given a bitset having cardinality card, unset all bit values in the list
+ * (there are length of them)
+ * and return the updated cardinality. This evidently assumes that the bitset
+ * already contained data.
+ */
+uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length);
+
+/*
+ * Given a bitset having cardinality card, toggle all bit values in the list
+ * (there are length of them)
+ * and return the updated cardinality. This evidently assumes that the bitset
+ * already contained data.
+ */
+
+uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length);
+
+void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length);
+
+#ifdef CROARING_IS_X64
+/***
+ * BEGIN Harley-Seal popcount functions.
+ */
+CROARING_TARGET_AVX2
+/**
+ * Compute the population count of a 256-bit word
+ * This is not especially fast, but it is convenient as part of other functions.
+ */
+static inline __m256i popcount256(__m256i v) {
+ const __m256i lookuppos = _mm256_setr_epi8(
+ /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
+ /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
+ /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
+ /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4,
+
+ /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
+ /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
+ /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
+ /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4);
+ const __m256i lookupneg = _mm256_setr_epi8(
+ /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
+ /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
+ /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
+ /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4,
+
+ /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
+ /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
+ /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
+ /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4);
+ const __m256i low_mask = _mm256_set1_epi8(0x0f);
+
+ const __m256i lo = _mm256_and_si256(v, low_mask);
+ const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask);
+ const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo);
+ const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi);
+ return _mm256_sad_epu8(popcnt1, popcnt2);
+}
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+/**
+ * Simple CSA over 256 bits
+ */
+static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b,
+ __m256i c) {
+ const __m256i u = _mm256_xor_si256(a, b);
+ *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c));
+ *l = _mm256_xor_si256(u, c);
+}
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+/**
+ * Fast Harley-Seal AVX population count function
+ */
+inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data,
+ const uint64_t size) {
+ __m256i total = _mm256_setzero_si256();
+ __m256i ones = _mm256_setzero_si256();
+ __m256i twos = _mm256_setzero_si256();
+ __m256i fours = _mm256_setzero_si256();
+ __m256i eights = _mm256_setzero_si256();
+ __m256i sixteens = _mm256_setzero_si256();
+ __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;
+
+ const uint64_t limit = size - size % 16;
+ uint64_t i = 0;
+
+ for (; i < limit; i += 16) {
+ CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i),
+ _mm256_lddqu_si256(data + i + 1));
+ CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2),
+ _mm256_lddqu_si256(data + i + 3));
+ CSA(&foursA, &twos, twos, twosA, twosB);
+ CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4),
+ _mm256_lddqu_si256(data + i + 5));
+ CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6),
+ _mm256_lddqu_si256(data + i + 7));
+ CSA(&foursB, &twos, twos, twosA, twosB);
+ CSA(&eightsA, &fours, fours, foursA, foursB);
+ CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8),
+ _mm256_lddqu_si256(data + i + 9));
+ CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10),
+ _mm256_lddqu_si256(data + i + 11));
+ CSA(&foursA, &twos, twos, twosA, twosB);
+ CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12),
+ _mm256_lddqu_si256(data + i + 13));
+ CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14),
+ _mm256_lddqu_si256(data + i + 15));
+ CSA(&foursB, &twos, twos, twosA, twosB);
+ CSA(&eightsB, &fours, fours, foursA, foursB);
+ CSA(&sixteens, &eights, eights, eightsA, eightsB);
+
+ total = _mm256_add_epi64(total, popcount256(sixteens));
+ }
+
+ total = _mm256_slli_epi64(total, 4); // * 16
+ total = _mm256_add_epi64(
+ total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ...
+ total = _mm256_add_epi64(
+ total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ...
+ total = _mm256_add_epi64(
+ total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ...
+ total = _mm256_add_epi64(total, popcount256(ones));
+ for (; i < size; i++)
+ total =
+ _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i)));
+
+ return (uint64_t)(_mm256_extract_epi64(total, 0)) +
+ (uint64_t)(_mm256_extract_epi64(total, 1)) +
+ (uint64_t)(_mm256_extract_epi64(total, 2)) +
+ (uint64_t)(_mm256_extract_epi64(total, 3));
+}
+CROARING_UNTARGET_REGION
+
+#define AVXPOPCNTFNC(opname, avx_intrinsic) \
+ static inline uint64_t avx2_harley_seal_popcount256_##opname( \
+ const __m256i *data1, const __m256i *data2, const uint64_t size) { \
+ __m256i total = _mm256_setzero_si256(); \
+ __m256i ones = _mm256_setzero_si256(); \
+ __m256i twos = _mm256_setzero_si256(); \
+ __m256i fours = _mm256_setzero_si256(); \
+ __m256i eights = _mm256_setzero_si256(); \
+ __m256i sixteens = _mm256_setzero_si256(); \
+ __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \
+ __m256i A1, A2; \
+ const uint64_t limit = size - size % 16; \
+ uint64_t i = 0; \
+ for (; i < limit; i += 16) { \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \
+ _mm256_lddqu_si256(data2 + i)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \
+ _mm256_lddqu_si256(data2 + i + 1)); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \
+ _mm256_lddqu_si256(data2 + i + 2)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \
+ _mm256_lddqu_si256(data2 + i + 3)); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursA, &twos, twos, twosA, twosB); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \
+ _mm256_lddqu_si256(data2 + i + 4)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \
+ _mm256_lddqu_si256(data2 + i + 5)); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \
+ _mm256_lddqu_si256(data2 + i + 6)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \
+ _mm256_lddqu_si256(data2 + i + 7)); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursB, &twos, twos, twosA, twosB); \
+ CSA(&eightsA, &fours, fours, foursA, foursB); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \
+ _mm256_lddqu_si256(data2 + i + 8)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \
+ _mm256_lddqu_si256(data2 + i + 9)); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \
+ _mm256_lddqu_si256(data2 + i + 10)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \
+ _mm256_lddqu_si256(data2 + i + 11)); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursA, &twos, twos, twosA, twosB); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \
+ _mm256_lddqu_si256(data2 + i + 12)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \
+ _mm256_lddqu_si256(data2 + i + 13)); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \
+ _mm256_lddqu_si256(data2 + i + 14)); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \
+ _mm256_lddqu_si256(data2 + i + 15)); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursB, &twos, twos, twosA, twosB); \
+ CSA(&eightsB, &fours, fours, foursA, foursB); \
+ CSA(&sixteens, &eights, eights, eightsA, eightsB); \
+ total = _mm256_add_epi64(total, popcount256(sixteens)); \
+ } \
+ total = _mm256_slli_epi64(total, 4); \
+ total = _mm256_add_epi64(total, \
+ _mm256_slli_epi64(popcount256(eights), 3)); \
+ total = \
+ _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \
+ total = \
+ _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \
+ total = _mm256_add_epi64(total, popcount256(ones)); \
+ for (; i < size; i++) { \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \
+ _mm256_lddqu_si256(data2 + i)); \
+ total = _mm256_add_epi64(total, popcount256(A1)); \
+ } \
+ return (uint64_t)(_mm256_extract_epi64(total, 0)) + \
+ (uint64_t)(_mm256_extract_epi64(total, 1)) + \
+ (uint64_t)(_mm256_extract_epi64(total, 2)) + \
+ (uint64_t)(_mm256_extract_epi64(total, 3)); \
+ } \
+ static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \
+ const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \
+ __m256i *__restrict__ out, const uint64_t size) { \
+ __m256i total = _mm256_setzero_si256(); \
+ __m256i ones = _mm256_setzero_si256(); \
+ __m256i twos = _mm256_setzero_si256(); \
+ __m256i fours = _mm256_setzero_si256(); \
+ __m256i eights = _mm256_setzero_si256(); \
+ __m256i sixteens = _mm256_setzero_si256(); \
+ __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \
+ __m256i A1, A2; \
+ const uint64_t limit = size - size % 16; \
+ uint64_t i = 0; \
+ for (; i < limit; i += 16) { \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \
+ _mm256_lddqu_si256(data2 + i)); \
+ _mm256_storeu_si256(out + i, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \
+ _mm256_lddqu_si256(data2 + i + 1)); \
+ _mm256_storeu_si256(out + i + 1, A2); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \
+ _mm256_lddqu_si256(data2 + i + 2)); \
+ _mm256_storeu_si256(out + i + 2, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \
+ _mm256_lddqu_si256(data2 + i + 3)); \
+ _mm256_storeu_si256(out + i + 3, A2); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursA, &twos, twos, twosA, twosB); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \
+ _mm256_lddqu_si256(data2 + i + 4)); \
+ _mm256_storeu_si256(out + i + 4, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \
+ _mm256_lddqu_si256(data2 + i + 5)); \
+ _mm256_storeu_si256(out + i + 5, A2); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \
+ _mm256_lddqu_si256(data2 + i + 6)); \
+ _mm256_storeu_si256(out + i + 6, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \
+ _mm256_lddqu_si256(data2 + i + 7)); \
+ _mm256_storeu_si256(out + i + 7, A2); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursB, &twos, twos, twosA, twosB); \
+ CSA(&eightsA, &fours, fours, foursA, foursB); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \
+ _mm256_lddqu_si256(data2 + i + 8)); \
+ _mm256_storeu_si256(out + i + 8, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \
+ _mm256_lddqu_si256(data2 + i + 9)); \
+ _mm256_storeu_si256(out + i + 9, A2); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \
+ _mm256_lddqu_si256(data2 + i + 10)); \
+ _mm256_storeu_si256(out + i + 10, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \
+ _mm256_lddqu_si256(data2 + i + 11)); \
+ _mm256_storeu_si256(out + i + 11, A2); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursA, &twos, twos, twosA, twosB); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \
+ _mm256_lddqu_si256(data2 + i + 12)); \
+ _mm256_storeu_si256(out + i + 12, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \
+ _mm256_lddqu_si256(data2 + i + 13)); \
+ _mm256_storeu_si256(out + i + 13, A2); \
+ CSA(&twosA, &ones, ones, A1, A2); \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \
+ _mm256_lddqu_si256(data2 + i + 14)); \
+ _mm256_storeu_si256(out + i + 14, A1); \
+ A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \
+ _mm256_lddqu_si256(data2 + i + 15)); \
+ _mm256_storeu_si256(out + i + 15, A2); \
+ CSA(&twosB, &ones, ones, A1, A2); \
+ CSA(&foursB, &twos, twos, twosA, twosB); \
+ CSA(&eightsB, &fours, fours, foursA, foursB); \
+ CSA(&sixteens, &eights, eights, eightsA, eightsB); \
+ total = _mm256_add_epi64(total, popcount256(sixteens)); \
+ } \
+ total = _mm256_slli_epi64(total, 4); \
+ total = _mm256_add_epi64(total, \
+ _mm256_slli_epi64(popcount256(eights), 3)); \
+ total = \
+ _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \
+ total = \
+ _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \
+ total = _mm256_add_epi64(total, popcount256(ones)); \
+ for (; i < size; i++) { \
+ A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \
+ _mm256_lddqu_si256(data2 + i)); \
+ _mm256_storeu_si256(out + i, A1); \
+ total = _mm256_add_epi64(total, popcount256(A1)); \
+ } \
+ return (uint64_t)(_mm256_extract_epi64(total, 0)) + \
+ (uint64_t)(_mm256_extract_epi64(total, 1)) + \
+ (uint64_t)(_mm256_extract_epi64(total, 2)) + \
+ (uint64_t)(_mm256_extract_epi64(total, 3)); \
+ }
+
+CROARING_TARGET_AVX2
+AVXPOPCNTFNC(or, _mm256_or_si256)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVXPOPCNTFNC(union, _mm256_or_si256)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVXPOPCNTFNC(and, _mm256_and_si256)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVXPOPCNTFNC(intersection, _mm256_and_si256)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVXPOPCNTFNC (xor, _mm256_xor_si256)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVXPOPCNTFNC(andnot, _mm256_andnot_si256)
+CROARING_UNTARGET_REGION
+
+/***
+ * END Harley-Seal popcount functions.
+ */
+
+#endif // CROARING_IS_X64
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal
+#endif
+
+#endif
+/* end file include/roaring/bitset_util.h */
+/* begin file include/roaring/containers/array.h */
+/*
+ * array.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_ARRAY_H_
+#define INCLUDE_CONTAINERS_ARRAY_H_
+
+#include <string.h>
+
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring {
+
+// Note: in pure C++ code, you should avoid putting `using` in header files
+using api::roaring_iterator;
+using api::roaring_iterator64;
+
+namespace internal {
+#endif
+
+/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */
+enum { DEFAULT_MAX_SIZE = 4096 };
+
+/* struct array_container - sparse representation of a bitmap
+ *
+ * @cardinality: number of indices in `array` (and the bitmap)
+ * @capacity: allocated size of `array`
+ * @array: sorted list of integers
+ */
+STRUCT_CONTAINER(array_container_s) {
+ int32_t cardinality;
+ int32_t capacity;
+ uint16_t *array;
+};
+
+typedef struct array_container_s array_container_t;
+
+#define CAST_array(c) CAST(array_container_t *, c) // safer downcast
+#define const_CAST_array(c) CAST(const array_container_t *, c)
+#define movable_CAST_array(c) movable_CAST(array_container_t **, c)
+
+/* Create a new array with default. Return NULL in case of failure. See also
+ * array_container_create_given_capacity. */
+array_container_t *array_container_create(void);
+
+/* Create a new array with a specified capacity size. Return NULL in case of
+ * failure. */
+array_container_t *array_container_create_given_capacity(int32_t size);
+
+/* Create a new array containing all values in [min,max). */
+array_container_t * array_container_create_range(uint32_t min, uint32_t max);
+
+/*
+ * Shrink the capacity to the actual size, return the number of bytes saved.
+ */
+int array_container_shrink_to_fit(array_container_t *src);
+
+/* Free memory owned by `array'. */
+void array_container_free(array_container_t *array);
+
+/* Duplicate container */
+array_container_t *array_container_clone(const array_container_t *src);
+
+/* Get the cardinality of `array'. */
+ALLOW_UNALIGNED
+static inline int array_container_cardinality(const array_container_t *array) {
+ return array->cardinality;
+}
+
+static inline bool array_container_nonzero_cardinality(
+ const array_container_t *array) {
+ return array->cardinality > 0;
+}
+
+/* Copy one container into another. We assume that they are distinct. */
+void array_container_copy(const array_container_t *src, array_container_t *dst);
+
+/* Add all the values in [min,max) (included) at a distance k*step from min.
+ The container must have a size less or equal to DEFAULT_MAX_SIZE after this
+ addition. */
+void array_container_add_from_range(array_container_t *arr, uint32_t min,
+ uint32_t max, uint16_t step);
+
+
+static inline bool array_container_empty(const array_container_t *array) {
+ return array->cardinality == 0;
+}
+
+/* check whether the cardinality is equal to the capacity (this does not mean
+* that it contains 1<<16 elements) */
+static inline bool array_container_full(const array_container_t *array) {
+ return array->cardinality == array->capacity;
+}
+
+
+/* Compute the union of `src_1' and `src_2' and write the result to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void array_container_union(const array_container_t *src_1,
+ const array_container_t *src_2,
+ array_container_t *dst);
+
+/* symmetric difference, see array_container_union */
+void array_container_xor(const array_container_t *array_1,
+ const array_container_t *array_2,
+ array_container_t *out);
+
+/* Computes the intersection of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+void array_container_intersection(const array_container_t *src_1,
+ const array_container_t *src_2,
+ array_container_t *dst);
+
+/* Check whether src_1 and src_2 intersect. */
+bool array_container_intersect(const array_container_t *src_1,
+ const array_container_t *src_2);
+
+
+/* computers the size of the intersection between two arrays.
+ */
+int array_container_intersection_cardinality(const array_container_t *src_1,
+ const array_container_t *src_2);
+
+/* computes the intersection of array1 and array2 and write the result to
+ * array1.
+ * */
+void array_container_intersection_inplace(array_container_t *src_1,
+ const array_container_t *src_2);
+
+/*
+ * Write out the 16-bit integers contained in this container as a list of 32-bit
+ * integers using base
+ * as the starting value (it might be expected that base has zeros in its 16
+ * least significant bits).
+ * The function returns the number of values written.
+ * The caller is responsible for allocating enough memory in out.
+ */
+int array_container_to_uint32_array(void *vout, const array_container_t *cont,
+ uint32_t base);
+
+/* Compute the number of runs */
+int32_t array_container_number_of_runs(const array_container_t *ac);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/*
+ * Print this container using printf (useful for debugging).
+ */
+void array_container_printf(const array_container_t *v);
+
+/*
+ * Print this container using printf as a comma-separated list of 32-bit
+ * integers starting at base.
+ */
+void array_container_printf_as_uint32_array(const array_container_t *v,
+ uint32_t base);
+#endif
+
+/**
+ * Return the serialized size in bytes of a container having cardinality "card".
+ */
+static inline int32_t array_container_serialized_size_in_bytes(int32_t card) {
+ return card * 2 + 2;
+}
+
+/**
+ * Increase capacity to at least min.
+ * Whether the existing data needs to be copied over depends on the "preserve"
+ * parameter. If preserve is false, then the new content will be uninitialized,
+ * otherwise the old content is copied.
+ */
+void array_container_grow(array_container_t *container, int32_t min,
+ bool preserve);
+
+bool array_container_iterate(const array_container_t *cont, uint32_t base,
+ roaring_iterator iterator, void *ptr);
+bool array_container_iterate64(const array_container_t *cont, uint32_t base,
+ roaring_iterator64 iterator, uint64_t high_bits,
+ void *ptr);
+
+/**
+ * Writes the underlying array to buf, outputs how many bytes were written.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes written should be
+ * array_container_size_in_bytes(container).
+ *
+ */
+int32_t array_container_write(const array_container_t *container, char *buf);
+/**
+ * Reads the instance from buf, outputs how many bytes were read.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes read should be array_container_size_in_bytes(container).
+ * You need to provide the (known) cardinality.
+ */
+int32_t array_container_read(int32_t cardinality, array_container_t *container,
+ const char *buf);
+
+/**
+ * Return the serialized size in bytes of a container (see
+ * bitset_container_write)
+ * This is meant to be compatible with the Java and Go versions of Roaring and
+ * assumes
+ * that the cardinality of the container is already known.
+ *
+ */
+static inline int32_t array_container_size_in_bytes(
+ const array_container_t *container) {
+ return container->cardinality * sizeof(uint16_t);
+}
+
+/**
+ * Return true if the two arrays have the same content.
+ */
+ALLOW_UNALIGNED
+static inline bool array_container_equals(
+ const array_container_t *container1,
+ const array_container_t *container2) {
+
+ if (container1->cardinality != container2->cardinality) {
+ return false;
+ }
+ return memequals(container1->array, container2->array, container1->cardinality*2);
+}
+
+/**
+ * Return true if container1 is a subset of container2.
+ */
+bool array_container_is_subset(const array_container_t *container1,
+ const array_container_t *container2);
+
+/**
+ * If the element of given rank is in this container, supposing that the first
+ * element has rank start_rank, then the function returns true and sets element
+ * accordingly.
+ * Otherwise, it returns false and update start_rank.
+ */
+static inline bool array_container_select(const array_container_t *container,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element) {
+ int card = array_container_cardinality(container);
+ if (*start_rank + card <= rank) {
+ *start_rank += card;
+ return false;
+ } else {
+ *element = container->array[rank - *start_rank];
+ return true;
+ }
+}
+
+/* Computes the difference of array1 and array2 and write the result
+ * to array out.
+ * Array out does not need to be distinct from array_1
+ */
+void array_container_andnot(const array_container_t *array_1,
+ const array_container_t *array_2,
+ array_container_t *out);
+
+/* Append x to the set. Assumes that the value is larger than any preceding
+ * values. */
+static inline void array_container_append(array_container_t *arr,
+ uint16_t pos) {
+ const int32_t capacity = arr->capacity;
+
+ if (array_container_full(arr)) {
+ array_container_grow(arr, capacity + 1, true);
+ }
+
+ arr->array[arr->cardinality++] = pos;
+}
+
+/**
+ * Add value to the set if final cardinality doesn't exceed max_cardinality.
+ * Return code:
+ * 1 -- value was added
+ * 0 -- value was already present
+ * -1 -- value was not added because cardinality would exceed max_cardinality
+ */
+static inline int array_container_try_add(array_container_t *arr, uint16_t value,
+ int32_t max_cardinality) {
+ const int32_t cardinality = arr->cardinality;
+
+ // best case, we can append.
+ if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) &&
+ cardinality < max_cardinality) {
+ array_container_append(arr, value);
+ return 1;
+ }
+
+ const int32_t loc = binarySearch(arr->array, cardinality, value);
+
+ if (loc >= 0) {
+ return 0;
+ } else if (cardinality < max_cardinality) {
+ if (array_container_full(arr)) {
+ array_container_grow(arr, arr->capacity + 1, true);
+ }
+ const int32_t insert_idx = -loc - 1;
+ memmove(arr->array + insert_idx + 1, arr->array + insert_idx,
+ (cardinality - insert_idx) * sizeof(uint16_t));
+ arr->array[insert_idx] = value;
+ arr->cardinality++;
+ return 1;
+ } else {
+ return -1;
+ }
+}
+
+/* Add value to the set. Returns true if x was not already present. */
+static inline bool array_container_add(array_container_t *arr, uint16_t value) {
+ return array_container_try_add(arr, value, INT32_MAX) == 1;
+}
+
+/* Remove x from the set. Returns true if x was present. */
+static inline bool array_container_remove(array_container_t *arr,
+ uint16_t pos) {
+ const int32_t idx = binarySearch(arr->array, arr->cardinality, pos);
+ const bool is_present = idx >= 0;
+ if (is_present) {
+ memmove(arr->array + idx, arr->array + idx + 1,
+ (arr->cardinality - idx - 1) * sizeof(uint16_t));
+ arr->cardinality--;
+ }
+
+ return is_present;
+}
+
+/* Check whether x is present. */
+inline bool array_container_contains(const array_container_t *arr,
+ uint16_t pos) {
+ // return binarySearch(arr->array, arr->cardinality, pos) >= 0;
+ // binary search with fallback to linear search for short ranges
+ int32_t low = 0;
+ int i;
+ const uint16_t * carr = (const uint16_t *) arr->array;
+ int32_t high = arr->cardinality - 1;
+ // while (high - low >= 0) {
+ while(high >= low + 16) {
+ int32_t middleIndex = (low + high)>>1;
+ uint16_t middleValue = carr[middleIndex];
+ if (middleValue < pos) {
+ low = middleIndex + 1;
+ } else if (middleValue > pos) {
+ high = middleIndex - 1;
+ } else {
+ return true;
+ }
+ }
+
+ for (i=low; i <= high; i++) {
+ uint16_t v = carr[i];
+ if (v == pos) {
+ return true;
+ }
+ if ( v > pos ) return false;
+ }
+ return false;
+
+}
+
+void array_container_offset(const array_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset);
+
+//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */
+static inline bool array_container_contains_range(const array_container_t *arr,
+ uint32_t range_start, uint32_t range_end) {
+ const int32_t range_count = range_end - range_start;
+ const uint16_t rs_included = range_start;
+ const uint16_t re_included = range_end - 1;
+
+ // Empty range is always included
+ if (range_count <= 0) {
+ return true;
+ }
+ if (range_count > arr->cardinality) {
+ return false;
+ }
+
+ const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included);
+ // If this sorted array contains all items in the range:
+ // * the start item must be found
+ // * the last item in range range_count must exist, and be the expected end value
+ return (start >= 0) && (arr->cardinality >= start + range_count) &&
+ (arr->array[start + range_count - 1] == re_included);
+}
+
+/* Returns the smallest value (assumes not empty) */
+inline uint16_t array_container_minimum(const array_container_t *arr) {
+ if (arr->cardinality == 0) return 0;
+ return arr->array[0];
+}
+
+/* Returns the largest value (assumes not empty) */
+inline uint16_t array_container_maximum(const array_container_t *arr) {
+ if (arr->cardinality == 0) return 0;
+ return arr->array[arr->cardinality - 1];
+}
+
+/* Returns the number of values equal or smaller than x */
+inline int array_container_rank(const array_container_t *arr, uint16_t x) {
+ const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
+ const bool is_present = idx >= 0;
+ if (is_present) {
+ return idx + 1;
+ } else {
+ return -idx - 1;
+ }
+}
+
+/* Returns the index of the first value equal or smaller than x, or -1 */
+inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
+ const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
+ const bool is_present = idx >= 0;
+ if (is_present) {
+ return idx;
+ } else {
+ int32_t candidate = - idx - 1;
+ if(candidate < arr->cardinality) return candidate;
+ return -1;
+ }
+}
+
+/*
+ * Adds all values in range [min,max] using hint:
+ * nvals_less is the number of array values less than $min
+ * nvals_greater is the number of array values greater than $max
+ */
+static inline void array_container_add_range_nvals(array_container_t *array,
+ uint32_t min, uint32_t max,
+ int32_t nvals_less,
+ int32_t nvals_greater) {
+ int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
+ uint32_t i;
+ if (union_cardinality > array->capacity) {
+ array_container_grow(array, union_cardinality, true);
+ }
+ memmove(&(array->array[union_cardinality - nvals_greater]),
+ &(array->array[array->cardinality - nvals_greater]),
+ nvals_greater * sizeof(uint16_t));
+ for ( i = 0; i <= max - min; i++) {
+ array->array[nvals_less + i] = min + i;
+ }
+ array->cardinality = union_cardinality;
+}
+
+/**
+ * Adds all values in range [min,max]. This function is currently unused
+ * and left as a documentation.
+ */
+/*static inline void array_container_add_range(array_container_t *array,
+ uint32_t min, uint32_t max) {
+ int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
+ int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
+ array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
+}*/
+
+/*
+ * Removes all elements array[pos] .. array[pos+count-1]
+ */
+static inline void array_container_remove_range(array_container_t *array,
+ uint32_t pos, uint32_t count) {
+ if (count != 0) {
+ memmove(&(array->array[pos]), &(array->array[pos+count]),
+ (array->cardinality - pos - count) * sizeof(uint16_t));
+ array->cardinality -= count;
+ }
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */
+/* end file include/roaring/containers/array.h */
+/* begin file include/roaring/containers/bitset.h */
+/*
+ * bitset.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_BITSET_H_
+#define INCLUDE_CONTAINERS_BITSET_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring {
+
+// Note: in pure C++ code, you should avoid putting `using` in header files
+using api::roaring_iterator;
+using api::roaring_iterator64;
+
+namespace internal {
+#endif
+
+
+
+enum {
+ BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64,
+ BITSET_UNKNOWN_CARDINALITY = -1
+};
+
+STRUCT_CONTAINER(bitset_container_s) {
+ int32_t cardinality;
+ uint64_t *words;
+};
+
+typedef struct bitset_container_s bitset_container_t;
+
+#define CAST_bitset(c) CAST(bitset_container_t *, c) // safer downcast
+#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
+#define movable_CAST_bitset(c) movable_CAST(bitset_container_t **, c)
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_container_t *bitset_container_create(void);
+
+/* Free memory. */
+void bitset_container_free(bitset_container_t *bitset);
+
+/* Clear bitset (sets bits to 0). */
+void bitset_container_clear(bitset_container_t *bitset);
+
+/* Set all bits to 1. */
+void bitset_container_set_all(bitset_container_t *bitset);
+
+/* Duplicate bitset */
+bitset_container_t *bitset_container_clone(const bitset_container_t *src);
+
+/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow
+ * and
+ * should not be used in performance-sensitive code. Ever. */
+void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
+ uint32_t end);
+
+#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(__AVX2__)
+/* Set the ith bit. */
+static inline void bitset_container_set(bitset_container_t *bitset,
+ uint16_t pos) {
+ uint64_t shift = 6;
+ uint64_t offset;
+ uint64_t p = pos;
+ ASM_SHIFT_RIGHT(p, shift, offset);
+ uint64_t load = bitset->words[offset];
+ ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
+ bitset->words[offset] = load;
+}
+
+/* Unset the ith bit. Currently unused. Could be used for optimization. */
+/*static inline void bitset_container_unset(bitset_container_t *bitset,
+ uint16_t pos) {
+ uint64_t shift = 6;
+ uint64_t offset;
+ uint64_t p = pos;
+ ASM_SHIFT_RIGHT(p, shift, offset);
+ uint64_t load = bitset->words[offset];
+ ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
+ bitset->words[offset] = load;
+}*/
+
+/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
+ * than bitset_container_set. */
+static inline bool bitset_container_add(bitset_container_t *bitset,
+ uint16_t pos) {
+ uint64_t shift = 6;
+ uint64_t offset;
+ uint64_t p = pos;
+ ASM_SHIFT_RIGHT(p, shift, offset);
+ uint64_t load = bitset->words[offset];
+ // could be possibly slightly further optimized
+ const int32_t oldcard = bitset->cardinality;
+ ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
+ bitset->words[offset] = load;
+ return bitset->cardinality - oldcard;
+}
+
+/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be
+ * slower than bitset_container_unset. */
+static inline bool bitset_container_remove(bitset_container_t *bitset,
+ uint16_t pos) {
+ uint64_t shift = 6;
+ uint64_t offset;
+ uint64_t p = pos;
+ ASM_SHIFT_RIGHT(p, shift, offset);
+ uint64_t load = bitset->words[offset];
+ // could be possibly slightly further optimized
+ const int32_t oldcard = bitset->cardinality;
+ ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
+ bitset->words[offset] = load;
+ return oldcard - bitset->cardinality;
+}
+
+/* Get the value of the ith bit. */
+inline bool bitset_container_get(const bitset_container_t *bitset,
+ uint16_t pos) {
+ uint64_t word = bitset->words[pos >> 6];
+ const uint64_t p = pos;
+ ASM_INPLACESHIFT_RIGHT(word, p);
+ return word & 1;
+}
+
+#else
+
+/* Set the ith bit. */
+static inline void bitset_container_set(bitset_container_t *bitset,
+ uint16_t pos) {
+ const uint64_t old_word = bitset->words[pos >> 6];
+ const int index = pos & 63;
+ const uint64_t new_word = old_word | (UINT64_C(1) << index);
+ bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index);
+ bitset->words[pos >> 6] = new_word;
+}
+
+/* Unset the ith bit. Currently unused. */
+/*static inline void bitset_container_unset(bitset_container_t *bitset,
+ uint16_t pos) {
+ const uint64_t old_word = bitset->words[pos >> 6];
+ const int index = pos & 63;
+ const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
+ bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index);
+ bitset->words[pos >> 6] = new_word;
+}*/
+
+/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
+ * than bitset_container_set. */
+static inline bool bitset_container_add(bitset_container_t *bitset,
+ uint16_t pos) {
+ const uint64_t old_word = bitset->words[pos >> 6];
+ const int index = pos & 63;
+ const uint64_t new_word = old_word | (UINT64_C(1) << index);
+ const uint64_t increment = (old_word ^ new_word) >> index;
+ bitset->cardinality += (uint32_t)increment;
+ bitset->words[pos >> 6] = new_word;
+ return increment > 0;
+}
+
+/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be
+ * slower than bitset_container_unset. */
+static inline bool bitset_container_remove(bitset_container_t *bitset,
+ uint16_t pos) {
+ const uint64_t old_word = bitset->words[pos >> 6];
+ const int index = pos & 63;
+ const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
+ const uint64_t increment = (old_word ^ new_word) >> index;
+ bitset->cardinality -= (uint32_t)increment;
+ bitset->words[pos >> 6] = new_word;
+ return increment > 0;
+}
+
+/* Get the value of the ith bit. */
+inline bool bitset_container_get(const bitset_container_t *bitset,
+ uint16_t pos) {
+ const uint64_t word = bitset->words[pos >> 6];
+ return (word >> (pos & 63)) & 1;
+}
+
+#endif
+
+/*
+* Check if all bits are set in a range of positions from pos_start (included) to
+* pos_end (excluded).
+*/
+static inline bool bitset_container_get_range(const bitset_container_t *bitset,
+ uint32_t pos_start, uint32_t pos_end) {
+
+ const uint32_t start = pos_start >> 6;
+ const uint32_t end = pos_end >> 6;
+ uint16_t i;
+ const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1);
+ const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1;
+
+ if (start == end) return ((bitset->words[end] & first & last) == (first & last));
+ if ((bitset->words[start] & first) != first) return false;
+
+ if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->words[end] & last) != last)){
+
+ return false;
+ }
+
+ for (i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){
+
+ if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
+ }
+
+ return true;
+}
+
+/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */
+inline bool bitset_container_contains(const bitset_container_t *bitset,
+ uint16_t pos) {
+ return bitset_container_get(bitset, pos);
+}
+
+/*
+* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded)
+* is present in `bitset'. Calls bitset_container_get_all.
+*/
+static inline bool bitset_container_contains_range(const bitset_container_t *bitset,
+ uint32_t pos_start, uint32_t pos_end) {
+ return bitset_container_get_range(bitset, pos_start, pos_end);
+}
+
+/* Get the number of bits set */
+ALLOW_UNALIGNED
+static inline int bitset_container_cardinality(
+ const bitset_container_t *bitset) {
+ return bitset->cardinality;
+}
+
+
+
+
+/* Copy one container into another. We assume that they are distinct. */
+void bitset_container_copy(const bitset_container_t *source,
+ bitset_container_t *dest);
+
+/* Add all the values [min,max) at a distance k*step from min: min,
+ * min+step,.... */
+void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
+ uint32_t max, uint16_t step);
+
+/* Get the number of bits set (force computation). This does not modify bitset.
+ * To update the cardinality, you should do
+ * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/
+int bitset_container_compute_cardinality(const bitset_container_t *bitset);
+
+/* Check whether this bitset is empty,
+ * it never modifies the bitset struct. */
+static inline bool bitset_container_empty(
+ const bitset_container_t *bitset) {
+ if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
+ int i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
+ if((bitset->words[i]) != 0) return false;
+ }
+ return true;
+ }
+ return bitset->cardinality == 0;
+}
+
+
+/* Get whether there is at least one bit set (see bitset_container_empty for the reverse),
+ the bitset is never modified */
+static inline bool bitset_container_const_nonzero_cardinality(
+ const bitset_container_t *bitset) {
+ return !bitset_container_empty(bitset);
+}
+
+/*
+ * Check whether the two bitsets intersect
+ */
+bool bitset_container_intersect(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
+ * cardinality. */
+int bitset_container_or(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality.
+ */
+int bitset_container_or_justcard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
+ * cardinality. Same as bitset_container_or. */
+int bitset_container_union(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the union of bitsets `src_1' and `src_2' and return the
+ * cardinality. Same as bitset_container_or_justcard. */
+int bitset_container_union_justcard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
+ * update the cardinality. Provided to optimize chained operations. */
+int bitset_container_or_nocard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
+ * return the cardinality. */
+int bitset_container_and(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the intersection of bitsets `src_1' and `src_2' and return the
+ * cardinality. */
+int bitset_container_and_justcard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
+ * return the cardinality. Same as bitset_container_and. */
+int bitset_container_intersection(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the intersection of bitsets `src_1' and `src_2' and return the
+ * cardinality. Same as bitset_container_and_justcard. */
+int bitset_container_intersection_justcard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_and_nocard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and
+ * return the cardinality. */
+int bitset_container_xor(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the
+ * cardinality. */
+int bitset_container_xor_justcard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_xor_nocard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the
+ * cardinality. */
+int bitset_container_andnot(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Computes the and not of bitsets `src_1' and `src_2' and return the
+ * cardinality. */
+int bitset_container_andnot_justcard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_andnot_nocard(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+void bitset_container_offset(const bitset_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset);
+/*
+ * Write out the 16-bit integers contained in this container as a list of 32-bit
+ * integers using base
+ * as the starting value (it might be expected that base has zeros in its 16
+ * least significant bits).
+ * The function returns the number of values written.
+ * The caller is responsible for allocating enough memory in out.
+ * The out pointer should point to enough memory (the cardinality times 32
+ * bits).
+ */
+int bitset_container_to_uint32_array(uint32_t *out,
+ const bitset_container_t *bc,
+ uint32_t base);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/*
+ * Print this container using printf (useful for debugging).
+ */
+void bitset_container_printf(const bitset_container_t *v);
+
+/*
+ * Print this container using printf as a comma-separated list of 32-bit
+ * integers starting at base.
+ */
+void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
+ uint32_t base);
+#endif
+
+/**
+ * Return the serialized size in bytes of a container.
+ */
+static inline int32_t bitset_container_serialized_size_in_bytes(void) {
+ return BITSET_CONTAINER_SIZE_IN_WORDS * 8;
+}
+
+/**
+ * Return the the number of runs.
+ */
+int bitset_container_number_of_runs(bitset_container_t *bc);
+
+bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base,
+ roaring_iterator iterator, void *ptr);
+bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base,
+ roaring_iterator64 iterator, uint64_t high_bits,
+ void *ptr);
+
+/**
+ * Writes the underlying array to buf, outputs how many bytes were written.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes written should be
+ * bitset_container_size_in_bytes(container).
+ */
+int32_t bitset_container_write(const bitset_container_t *container, char *buf);
+
+/**
+ * Reads the instance from buf, outputs how many bytes were read.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes read should be bitset_container_size_in_bytes(container).
+ * You need to provide the (known) cardinality.
+ */
+int32_t bitset_container_read(int32_t cardinality,
+ bitset_container_t *container, const char *buf);
+/**
+ * Return the serialized size in bytes of a container (see
+ * bitset_container_write).
+ * This is meant to be compatible with the Java and Go versions of Roaring and
+ * assumes
+ * that the cardinality of the container is already known or can be computed.
+ */
+static inline int32_t bitset_container_size_in_bytes(
+ const bitset_container_t *container) {
+ (void)container;
+ return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+}
+
+/**
+ * Return true if the two containers have the same content.
+ */
+bool bitset_container_equals(const bitset_container_t *container1,
+ const bitset_container_t *container2);
+
+/**
+* Return true if container1 is a subset of container2.
+*/
+bool bitset_container_is_subset(const bitset_container_t *container1,
+ const bitset_container_t *container2);
+
+/**
+ * If the element of given rank is in this container, supposing that the first
+ * element has rank start_rank, then the function returns true and sets element
+ * accordingly.
+ * Otherwise, it returns false and update start_rank.
+ */
+bool bitset_container_select(const bitset_container_t *container,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element);
+
+/* Returns the smallest value (assumes not empty) */
+uint16_t bitset_container_minimum(const bitset_container_t *container);
+
+/* Returns the largest value (assumes not empty) */
+uint16_t bitset_container_maximum(const bitset_container_t *container);
+
+/* Returns the number of values equal or smaller than x */
+int bitset_container_rank(const bitset_container_t *container, uint16_t x);
+
+/* Returns the index of the first value equal or larger than x, or -1 */
+int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_BITSET_H_ */
+/* end file include/roaring/containers/bitset.h */
+/* begin file include/roaring/containers/run.h */
+/*
+ * run.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_RUN_H_
+#define INCLUDE_CONTAINERS_RUN_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring {
+
+// Note: in pure C++ code, you should avoid putting `using` in header files
+using api::roaring_iterator;
+using api::roaring_iterator64;
+
+namespace internal {
+#endif
+
+/* struct rle16_s - run length pair
+ *
+ * @value: start position of the run
+ * @length: length of the run is `length + 1`
+ *
+ * An RLE pair {v, l} would represent the integers between the interval
+ * [v, v+l+1], e.g. {3, 2} = [3, 4, 5].
+ */
+struct rle16_s {
+ uint16_t value;
+ uint16_t length;
+};
+
+typedef struct rle16_s rle16_t;
+
+#ifdef __cplusplus
+ #define MAKE_RLE16(val,len) \
+ {(uint16_t)(val), (uint16_t)(len)} // no tagged structs until c++20
+#else
+ #define MAKE_RLE16(val,len) \
+ (rle16_t){.value = (uint16_t)(val), .length = (uint16_t)(len)}
+#endif
+
+/* struct run_container_s - run container bitmap
+ *
+ * @n_runs: number of rle_t pairs in `runs`.
+ * @capacity: capacity in rle_t pairs `runs` can hold.
+ * @runs: pairs of rle_t.
+ */
+STRUCT_CONTAINER(run_container_s) {
+ int32_t n_runs;
+ int32_t capacity;
+ rle16_t *runs;
+};
+
+typedef struct run_container_s run_container_t;
+
+#define CAST_run(c) CAST(run_container_t *, c) // safer downcast
+#define const_CAST_run(c) CAST(const run_container_t *, c)
+#define movable_CAST_run(c) movable_CAST(run_container_t **, c)
+
+/* Create a new run container. Return NULL in case of failure. */
+run_container_t *run_container_create(void);
+
+/* Create a new run container with given capacity. Return NULL in case of
+ * failure. */
+run_container_t *run_container_create_given_capacity(int32_t size);
+
+/*
+ * Shrink the capacity to the actual size, return the number of bytes saved.
+ */
+int run_container_shrink_to_fit(run_container_t *src);
+
+/* Free memory owned by `run'. */
+void run_container_free(run_container_t *run);
+
+/* Duplicate container */
+run_container_t *run_container_clone(const run_container_t *src);
+
+/*
+ * Effectively deletes the value at index index, repacking data.
+ */
+static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) {
+ memmove(run->runs + index, run->runs + (1 + index),
+ (run->n_runs - index - 1) * sizeof(rle16_t));
+ run->n_runs--;
+}
+
+/**
+ * Good old binary search through rle data
+ */
+inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray,
+ uint16_t ikey) {
+ int32_t low = 0;
+ int32_t high = lenarray - 1;
+ while (low <= high) {
+ int32_t middleIndex = (low + high) >> 1;
+ uint16_t middleValue = array[middleIndex].value;
+ if (middleValue < ikey) {
+ low = middleIndex + 1;
+ } else if (middleValue > ikey) {
+ high = middleIndex - 1;
+ } else {
+ return middleIndex;
+ }
+ }
+ return -(low + 1);
+}
+
+/*
+ * Returns index of the run which contains $ikey
+ */
+static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray,
+ uint16_t ikey) {
+ int32_t low = 0;
+ int32_t high = lenarray - 1;
+ while (low <= high) {
+ int32_t middleIndex = (low + high) >> 1;
+ uint16_t min = array[middleIndex].value;
+ uint16_t max = array[middleIndex].value + array[middleIndex].length;
+ if (ikey > max) {
+ low = middleIndex + 1;
+ } else if (ikey < min) {
+ high = middleIndex - 1;
+ } else {
+ return middleIndex;
+ }
+ }
+ return -(low + 1);
+}
+
+
+/**
+ * Returns number of runs which can'be be merged with the key because they
+ * are less than the key.
+ * Note that [5,6,7,8] can be merged with the key 9 and won't be counted.
+ */
+static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray,
+ uint16_t key) {
+ if (lenarray == 0) return 0;
+ int32_t low = 0;
+ int32_t high = lenarray - 1;
+ while (low <= high) {
+ int32_t middleIndex = (low + high) >> 1;
+ uint16_t min_value = array[middleIndex].value;
+ uint16_t max_value = array[middleIndex].value + array[middleIndex].length;
+ if (max_value + UINT32_C(1) < key) { // uint32 arithmetic
+ low = middleIndex + 1;
+ } else if (key < min_value) {
+ high = middleIndex - 1;
+ } else {
+ return middleIndex;
+ }
+ }
+ return low;
+}
+
+static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray,
+ uint16_t key) {
+ if (lenarray == 0) return 0;
+ int32_t low = 0;
+ int32_t high = lenarray - 1;
+ while (low <= high) {
+ int32_t middleIndex = (low + high) >> 1;
+ uint16_t min_value = array[middleIndex].value;
+ uint16_t max_value = array[middleIndex].value + array[middleIndex].length;
+ if (max_value < key) {
+ low = middleIndex + 1;
+ } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic
+ high = middleIndex - 1;
+ } else {
+ return lenarray - (middleIndex + 1);
+ }
+ }
+ return lenarray - low;
+}
+
+/**
+ * increase capacity to at least min. Whether the
+ * existing data needs to be copied over depends on copy. If "copy" is false,
+ * then the new content will be uninitialized, otherwise a copy is made.
+ */
+void run_container_grow(run_container_t *run, int32_t min, bool copy);
+
+/**
+ * Moves the data so that we can write data at index
+ */
+static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) {
+ /* This function calls realloc + memmove sequentially to move by one index.
+ * Potentially copying twice the array.
+ */
+ if (run->n_runs + 1 > run->capacity)
+ run_container_grow(run, run->n_runs + 1, true);
+ memmove(run->runs + 1 + index, run->runs + index,
+ (run->n_runs - index) * sizeof(rle16_t));
+ run->n_runs++;
+}
+
+/* Add `pos' to `run'. Returns true if `pos' was not present. */
+bool run_container_add(run_container_t *run, uint16_t pos);
+
+/* Remove `pos' from `run'. Returns true if `pos' was present. */
+static inline bool run_container_remove(run_container_t *run, uint16_t pos) {
+ int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
+ if (index >= 0) {
+ int32_t le = run->runs[index].length;
+ if (le == 0) {
+ recoverRoomAtIndex(run, (uint16_t)index);
+ } else {
+ run->runs[index].value++;
+ run->runs[index].length--;
+ }
+ return true;
+ }
+ index = -index - 2; // points to preceding value, possibly -1
+ if (index >= 0) { // possible match
+ int32_t offset = pos - run->runs[index].value;
+ int32_t le = run->runs[index].length;
+ if (offset < le) {
+ // need to break in two
+ run->runs[index].length = (uint16_t)(offset - 1);
+ // need to insert
+ uint16_t newvalue = pos + 1;
+ int32_t newlength = le - offset - 1;
+ makeRoomAtIndex(run, (uint16_t)(index + 1));
+ run->runs[index + 1].value = newvalue;
+ run->runs[index + 1].length = (uint16_t)newlength;
+ return true;
+
+ } else if (offset == le) {
+ run->runs[index].length--;
+ return true;
+ }
+ }
+ // no match
+ return false;
+}
+
+/* Check whether `pos' is present in `run'. */
+inline bool run_container_contains(const run_container_t *run, uint16_t pos) {
+ int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
+ if (index >= 0) return true;
+ index = -index - 2; // points to preceding value, possibly -1
+ if (index != -1) { // possible match
+ int32_t offset = pos - run->runs[index].value;
+ int32_t le = run->runs[index].length;
+ if (offset <= le) return true;
+ }
+ return false;
+}
+
+/*
+* Check whether all positions in a range of positions from pos_start (included)
+* to pos_end (excluded) is present in `run'.
+*/
+static inline bool run_container_contains_range(const run_container_t *run,
+ uint32_t pos_start, uint32_t pos_end) {
+ uint32_t count = 0;
+ int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start), i;
+ if (index < 0) {
+ index = -index - 2;
+ if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){
+ return false;
+ }
+ }
+ for (i = index; i < run->n_runs; ++i) {
+ const uint32_t stop = run->runs[i].value + run->runs[i].length;
+ if (run->runs[i].value >= pos_end) break;
+ if (stop >= pos_end) {
+ count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0);
+ break;
+ }
+ const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0;
+ count += (min < run->runs[i].length) ? min : run->runs[i].length;
+ }
+ return count >= (pos_end - pos_start - 1);
+}
+
+/* Get the cardinality of `run'. Requires an actual computation. */
+int run_container_cardinality(const run_container_t *run);
+
+/* Card > 0?, see run_container_empty for the reverse */
+static inline bool run_container_nonzero_cardinality(
+ const run_container_t *run) {
+ return run->n_runs > 0; // runs never empty
+}
+
+/* Card == 0?, see run_container_nonzero_cardinality for the reverse */
+static inline bool run_container_empty(
+ const run_container_t *run) {
+ return run->n_runs == 0; // runs never empty
+}
+
+
+
+/* Copy one container into another. We assume that they are distinct. */
+void run_container_copy(const run_container_t *src, run_container_t *dst);
+
+/**
+ * Append run described by vl to the run container, possibly merging.
+ * It is assumed that the run would be inserted at the end of the container, no
+ * check is made.
+ * It is assumed that the run container has the necessary capacity: caller is
+ * responsible for checking memory capacity.
+ *
+ *
+ * This is not a safe function, it is meant for performance: use with care.
+ */
+static inline void run_container_append(run_container_t *run, rle16_t vl,
+ rle16_t *previousrl) {
+ const uint32_t previousend = previousrl->value + previousrl->length;
+ if (vl.value > previousend + 1) { // we add a new one
+ run->runs[run->n_runs] = vl;
+ run->n_runs++;
+ *previousrl = vl;
+ } else {
+ uint32_t newend = vl.value + vl.length + UINT32_C(1);
+ if (newend > previousend) { // we merge
+ previousrl->length = (uint16_t)(newend - 1 - previousrl->value);
+ run->runs[run->n_runs - 1] = *previousrl;
+ }
+ }
+}
+
+/**
+ * Like run_container_append but it is assumed that the content of run is empty.
+ */
+static inline rle16_t run_container_append_first(run_container_t *run,
+ rle16_t vl) {
+ run->runs[run->n_runs] = vl;
+ run->n_runs++;
+ return vl;
+}
+
+/**
+ * append a single value given by val to the run container, possibly merging.
+ * It is assumed that the value would be inserted at the end of the container,
+ * no check is made.
+ * It is assumed that the run container has the necessary capacity: caller is
+ * responsible for checking memory capacity.
+ *
+ * This is not a safe function, it is meant for performance: use with care.
+ */
+static inline void run_container_append_value(run_container_t *run,
+ uint16_t val,
+ rle16_t *previousrl) {
+ const uint32_t previousend = previousrl->value + previousrl->length;
+ if (val > previousend + 1) { // we add a new one
+ *previousrl = MAKE_RLE16(val, 0);
+ run->runs[run->n_runs] = *previousrl;
+ run->n_runs++;
+ } else if (val == previousend + 1) { // we merge
+ previousrl->length++;
+ run->runs[run->n_runs - 1] = *previousrl;
+ }
+}
+
+/**
+ * Like run_container_append_value but it is assumed that the content of run is
+ * empty.
+ */
+static inline rle16_t run_container_append_value_first(run_container_t *run,
+ uint16_t val) {
+ rle16_t newrle = MAKE_RLE16(val, 0);
+ run->runs[run->n_runs] = newrle;
+ run->n_runs++;
+ return newrle;
+}
+
+/* Check whether the container spans the whole chunk (cardinality = 1<<16).
+ * This check can be done in constant time (inexpensive). */
+static inline bool run_container_is_full(const run_container_t *run) {
+ rle16_t vl = run->runs[0];
+ return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF);
+}
+
+/* Compute the union of `src_1' and `src_2' and write the result to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void run_container_union(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst);
+
+/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */
+void run_container_union_inplace(run_container_t *src_1,
+ const run_container_t *src_2);
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+void run_container_intersection(const run_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst);
+
+/* Compute the size of the intersection of src_1 and src_2 . */
+int run_container_intersection_cardinality(const run_container_t *src_1,
+ const run_container_t *src_2);
+
+/* Check whether src_1 and src_2 intersect. */
+bool run_container_intersect(const run_container_t *src_1,
+ const run_container_t *src_2);
+
+/* Compute the symmetric difference of `src_1' and `src_2' and write the result
+ * to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void run_container_xor(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst);
+
+/*
+ * Write out the 16-bit integers contained in this container as a list of 32-bit
+ * integers using base
+ * as the starting value (it might be expected that base has zeros in its 16
+ * least significant bits).
+ * The function returns the number of values written.
+ * The caller is responsible for allocating enough memory in out.
+ */
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
+ uint32_t base);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/*
+ * Print this container using printf (useful for debugging).
+ */
+void run_container_printf(const run_container_t *v);
+
+/*
+ * Print this container using printf as a comma-separated list of 32-bit
+ * integers starting at base.
+ */
+void run_container_printf_as_uint32_array(const run_container_t *v,
+ uint32_t base);
+#endif
+
+/**
+ * Return the serialized size in bytes of a container having "num_runs" runs.
+ */
+static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) {
+ return sizeof(uint16_t) +
+ sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries.
+}
+
+bool run_container_iterate(const run_container_t *cont, uint32_t base,
+ roaring_iterator iterator, void *ptr);
+bool run_container_iterate64(const run_container_t *cont, uint32_t base,
+ roaring_iterator64 iterator, uint64_t high_bits,
+ void *ptr);
+
+/**
+ * Writes the underlying array to buf, outputs how many bytes were written.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes written should be run_container_size_in_bytes(container).
+ */
+int32_t run_container_write(const run_container_t *container, char *buf);
+
+/**
+ * Reads the instance from buf, outputs how many bytes were read.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes read should be bitset_container_size_in_bytes(container).
+ * The cardinality parameter is provided for consistency with other containers,
+ * but
+ * it might be effectively ignored..
+ */
+int32_t run_container_read(int32_t cardinality, run_container_t *container,
+ const char *buf);
+
+/**
+ * Return the serialized size in bytes of a container (see run_container_write).
+ * This is meant to be compatible with the Java and Go versions of Roaring.
+ */
+static inline int32_t run_container_size_in_bytes(
+ const run_container_t *container) {
+ return run_container_serialized_size_in_bytes(container->n_runs);
+}
+
+/**
+ * Return true if the two containers have the same content.
+ */
+ALLOW_UNALIGNED
+static inline bool run_container_equals(const run_container_t *container1,
+ const run_container_t *container2) {
+ if (container1->n_runs != container2->n_runs) {
+ return false;
+ }
+ return memequals(container1->runs, container2->runs,
+ container1->n_runs * sizeof(rle16_t));
+}
+
+/**
+* Return true if container1 is a subset of container2.
+*/
+bool run_container_is_subset(const run_container_t *container1,
+ const run_container_t *container2);
+
+/**
+ * Used in a start-finish scan that appends segments, for XOR and NOT
+ */
+
+void run_container_smart_append_exclusive(run_container_t *src,
+ const uint16_t start,
+ const uint16_t length);
+
+/**
+* The new container consists of a single run [start,stop).
+* It is required that stop>start, the caller is responsability for this check.
+* It is required that stop <= (1<<16), the caller is responsability for this check.
+* The cardinality of the created container is stop - start.
+* Returns NULL on failure
+*/
+static inline run_container_t *run_container_create_range(uint32_t start,
+ uint32_t stop) {
+ run_container_t *rc = run_container_create_given_capacity(1);
+ if (rc) {
+ rle16_t r;
+ r.value = (uint16_t)start;
+ r.length = (uint16_t)(stop - start - 1);
+ run_container_append_first(rc, r);
+ }
+ return rc;
+}
+
+/**
+ * If the element of given rank is in this container, supposing that the first
+ * element has rank start_rank, then the function returns true and sets element
+ * accordingly.
+ * Otherwise, it returns false and update start_rank.
+ */
+bool run_container_select(const run_container_t *container,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element);
+
+/* Compute the difference of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+
+void run_container_andnot(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst);
+
+void run_container_offset(const run_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset);
+
+/* Returns the smallest value (assumes not empty) */
+inline uint16_t run_container_minimum(const run_container_t *run) {
+ if (run->n_runs == 0) return 0;
+ return run->runs[0].value;
+}
+
+/* Returns the largest value (assumes not empty) */
+inline uint16_t run_container_maximum(const run_container_t *run) {
+ if (run->n_runs == 0) return 0;
+ return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length;
+}
+
+/* Returns the number of values equal or smaller than x */
+int run_container_rank(const run_container_t *arr, uint16_t x);
+
+/* Returns the index of the first run containing a value at least as large as x, or -1 */
+inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) {
+ int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x);
+ if (index >= 0) return index;
+ index = -index - 2; // points to preceding run, possibly -1
+ if (index != -1) { // possible match
+ int32_t offset = x - arr->runs[index].value;
+ int32_t le = arr->runs[index].length;
+ if (offset <= le) return index;
+ }
+ index += 1;
+ if(index < arr->n_runs) {
+ return index;
+ }
+ return -1;
+}
+
+/*
+ * Add all values in range [min, max] using hint.
+ */
+static inline void run_container_add_range_nruns(run_container_t* run,
+ uint32_t min, uint32_t max,
+ int32_t nruns_less,
+ int32_t nruns_greater) {
+ int32_t nruns_common = run->n_runs - nruns_less - nruns_greater;
+ if (nruns_common == 0) {
+ makeRoomAtIndex(run, nruns_less);
+ run->runs[nruns_less].value = min;
+ run->runs[nruns_less].length = max - min;
+ } else {
+ uint32_t common_min = run->runs[nruns_less].value;
+ uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value +
+ run->runs[nruns_less + nruns_common - 1].length;
+ uint32_t result_min = (common_min < min) ? common_min : min;
+ uint32_t result_max = (common_max > max) ? common_max : max;
+
+ run->runs[nruns_less].value = result_min;
+ run->runs[nruns_less].length = result_max - result_min;
+
+ memmove(&(run->runs[nruns_less + 1]),
+ &(run->runs[run->n_runs - nruns_greater]),
+ nruns_greater*sizeof(rle16_t));
+ run->n_runs = nruns_less + 1 + nruns_greater;
+ }
+}
+
+/**
+ * Add all values in range [min, max]. This function is currently unused
+ * and left as documentation.
+ */
+/*static inline void run_container_add_range(run_container_t* run,
+ uint32_t min, uint32_t max) {
+ int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
+ int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
+ run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
+}*/
+
+/**
+ * Shifts last $count elements either left (distance < 0) or right (distance > 0)
+ */
+static inline void run_container_shift_tail(run_container_t* run,
+ int32_t count, int32_t distance) {
+ if (distance > 0) {
+ if (run->capacity < count+distance) {
+ run_container_grow(run, count+distance, true);
+ }
+ }
+ int32_t srcpos = run->n_runs - count;
+ int32_t dstpos = srcpos + distance;
+ memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count);
+ run->n_runs += distance;
+}
+
+/**
+ * Remove all elements in range [min, max]
+ */
+static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) {
+ int32_t first = rle16_find_run(run->runs, run->n_runs, min);
+ int32_t last = rle16_find_run(run->runs, run->n_runs, max);
+
+ if (first >= 0 && min > run->runs[first].value &&
+ max < ((uint32_t)run->runs[first].value + (uint32_t)run->runs[first].length)) {
+ // split this run into two adjacent runs
+
+ // right subinterval
+ makeRoomAtIndex(run, first+1);
+ run->runs[first+1].value = max + 1;
+ run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1);
+
+ // left subinterval
+ run->runs[first].length = (min - 1) - run->runs[first].value;
+
+ return;
+ }
+
+ // update left-most partial run
+ if (first >= 0) {
+ if (min > run->runs[first].value) {
+ run->runs[first].length = (min - 1) - run->runs[first].value;
+ first++;
+ }
+ } else {
+ first = -first-1;
+ }
+
+ // update right-most run
+ if (last >= 0) {
+ uint16_t run_max = run->runs[last].value + run->runs[last].length;
+ if (run_max > max) {
+ run->runs[last].value = max + 1;
+ run->runs[last].length = run_max - (max + 1);
+ last--;
+ }
+ } else {
+ last = (-last-1) - 1;
+ }
+
+ // remove intermediate runs
+ if (first <= last) {
+ run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1));
+ }
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_RUN_H_ */
+/* end file include/roaring/containers/run.h */
+/* begin file include/roaring/containers/convert.h */
+/*
+ * convert.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_CONVERT_H_
+#define INCLUDE_CONTAINERS_CONVERT_H_
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Convert an array into a bitset. The input container is not freed or modified.
+ */
+bitset_container_t *bitset_container_from_array(const array_container_t *arr);
+
+/* Convert a run into a bitset. The input container is not freed or modified. */
+bitset_container_t *bitset_container_from_run(const run_container_t *arr);
+
+/* Convert a run into an array. The input container is not freed or modified. */
+array_container_t *array_container_from_run(const run_container_t *arr);
+
+/* Convert a bitset into an array. The input container is not freed or modified.
+ */
+array_container_t *array_container_from_bitset(const bitset_container_t *bits);
+
+/* Convert an array into a run. The input container is not freed or modified.
+ */
+run_container_t *run_container_from_array(const array_container_t *c);
+
+/* convert a run into either an array or a bitset
+ * might free the container. This does not free the input run container. */
+container_t *convert_to_bitset_or_array_container(
+ run_container_t *rc, int32_t card,
+ uint8_t *resulttype);
+
+/* convert containers to and from runcontainers, as is most space efficient.
+ * The container might be freed. */
+container_t *convert_run_optimize(
+ container_t *c, uint8_t typecode_original,
+ uint8_t *typecode_after);
+
+/* converts a run container to either an array or a bitset, IF it saves space.
+ */
+/* If a conversion occurs, the caller is responsible to free the original
+ * container and
+ * he becomes reponsible to free the new one. */
+container_t *convert_run_to_efficient_container(
+ run_container_t *c, uint8_t *typecode_after);
+
+// like convert_run_to_efficient_container but frees the old result if needed
+container_t *convert_run_to_efficient_container_and_free(
+ run_container_t *c, uint8_t *typecode_after);
+
+/**
+ * Create new container which is a union of run container and
+ * range [min, max]. Caller is responsible for freeing run container.
+ */
+container_t *container_from_run_range(
+ const run_container_t *run,
+ uint32_t min, uint32_t max,
+ uint8_t *typecode_after);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */
+/* end file include/roaring/containers/convert.h */
+/* begin file include/roaring/containers/mixed_equal.h */
+/*
+ * mixed_equal.h
+ *
+ */
+
+#ifndef CONTAINERS_MIXED_EQUAL_H_
+#define CONTAINERS_MIXED_EQUAL_H_
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/**
+ * Return true if the two containers have the same content.
+ */
+bool array_container_equal_bitset(const array_container_t* container1,
+ const bitset_container_t* container2);
+
+/**
+ * Return true if the two containers have the same content.
+ */
+bool run_container_equals_array(const run_container_t* container1,
+ const array_container_t* container2);
+/**
+ * Return true if the two containers have the same content.
+ */
+bool run_container_equals_bitset(const run_container_t* container1,
+ const bitset_container_t* container2);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* CONTAINERS_MIXED_EQUAL_H_ */
+/* end file include/roaring/containers/mixed_equal.h */
+/* begin file include/roaring/containers/mixed_subset.h */
+/*
+ * mixed_subset.h
+ *
+ */
+
+#ifndef CONTAINERS_MIXED_SUBSET_H_
+#define CONTAINERS_MIXED_SUBSET_H_
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/**
+ * Return true if container1 is a subset of container2.
+ */
+bool array_container_is_subset_bitset(const array_container_t* container1,
+ const bitset_container_t* container2);
+
+/**
+* Return true if container1 is a subset of container2.
+ */
+bool run_container_is_subset_array(const run_container_t* container1,
+ const array_container_t* container2);
+
+/**
+* Return true if container1 is a subset of container2.
+ */
+bool array_container_is_subset_run(const array_container_t* container1,
+ const run_container_t* container2);
+
+/**
+* Return true if container1 is a subset of container2.
+ */
+bool run_container_is_subset_bitset(const run_container_t* container1,
+ const bitset_container_t* container2);
+
+/**
+* Return true if container1 is a subset of container2.
+*/
+bool bitset_container_is_subset_run(const bitset_container_t* container1,
+ const run_container_t* container2);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* CONTAINERS_MIXED_SUBSET_H_ */
+/* end file include/roaring/containers/mixed_subset.h */
+/* begin file include/roaring/containers/mixed_andnot.h */
+/*
+ * mixed_andnot.h
+ */
+#ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_
+#define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst, a valid array container that could be the same as dst.*/
+void array_bitset_container_andnot(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ array_container_t *dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * src_1 */
+
+void array_bitset_container_iandnot(array_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst, which does not initially have a valid container.
+ * Return true for a bitset result; false for array
+ */
+
+bool bitset_array_container_andnot(
+ const bitset_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_array_container_iandnot(
+ bitset_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_andnot(
+ const run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_iandnot(
+ run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool bitset_run_container_andnot(
+ const bitset_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_run_container_iandnot(
+ bitset_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any type of container.
+ */
+
+int run_array_container_andnot(
+ const run_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+int run_array_container_iandnot(
+ run_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* dst must be a valid array container, allowed to be src_1 */
+
+void array_run_container_andnot(const array_container_t *src_1,
+ const run_container_t *src_2,
+ array_container_t *dst);
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+void array_run_container_iandnot(array_container_t *src_1,
+ const run_container_t *src_2);
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int run_run_container_andnot(
+ const run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+int run_run_container_iandnot(
+ run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/*
+ * dst is a valid array container and may be the same as src_1
+ */
+
+void array_array_container_andnot(const array_container_t *src_1,
+ const array_container_t *src_2,
+ array_container_t *dst);
+
+/* inplace array-array andnot will always be able to reuse the space of
+ * src_1 */
+void array_array_container_iandnot(array_container_t *src_1,
+ const array_container_t *src_2);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). Return value is
+ * "dst is a bitset"
+ */
+
+bool bitset_bitset_container_andnot(
+ const bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_bitset_container_iandnot(
+ bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif
+/* end file include/roaring/containers/mixed_andnot.h */
+/* begin file include/roaring/containers/mixed_intersection.h */
+/*
+ * mixed_intersection.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_
+#define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_
+
+/* These functions appear to exclude cases where the
+ * inputs have the same type and the output is guaranteed
+ * to have the same type as the inputs. Eg, array intersection
+ */
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
+ * valid container. */
+void array_bitset_container_intersection(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ array_container_t *dst);
+
+/* Compute the size of the intersection of src_1 and src_2. */
+int array_bitset_container_intersection_cardinality(
+ const array_container_t *src_1, const bitset_container_t *src_2);
+
+
+
+/* Checking whether src_1 and src_2 intersect. */
+bool array_bitset_container_intersect(const array_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/*
+ * Compute the intersection between src_1 and src_2 and write the result
+ * to *dst. If the return function is true, the result is a bitset_container_t
+ * otherwise is a array_container_t. We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the intersection between src_1 and src_2 and write the result to
+ * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
+ * valid container. */
+void array_run_container_intersection(const array_container_t *src_1,
+ const run_container_t *src_2,
+ array_container_t *dst);
+
+/* Compute the intersection between src_1 and src_2 and write the result to
+ * *dst. If the result is true then the result is a bitset_container_t
+ * otherwise is a array_container_t.
+ * If *dst == src_2, then an in-place intersection is attempted
+ **/
+bool run_bitset_container_intersection(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the size of the intersection between src_1 and src_2 . */
+int array_run_container_intersection_cardinality(const array_container_t *src_1,
+ const run_container_t *src_2);
+
+/* Compute the size of the intersection between src_1 and src_2
+ **/
+int run_bitset_container_intersection_cardinality(const run_container_t *src_1,
+ const bitset_container_t *src_2);
+
+
+/* Check that src_1 and src_2 intersect. */
+bool array_run_container_intersect(const array_container_t *src_1,
+ const run_container_t *src_2);
+
+/* Check that src_1 and src_2 intersect.
+ **/
+bool run_bitset_container_intersect(const run_container_t *src_1,
+ const bitset_container_t *src_2);
+
+/*
+ * Same as bitset_bitset_container_intersection except that if the output is to
+ * be a
+ * bitset_container_t, then src_1 is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_bitset_container_intersection_inplace(
+ bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */
+/* end file include/roaring/containers/mixed_intersection.h */
+/* begin file include/roaring/containers/mixed_negation.h */
+/*
+ * mixed_negation.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_
+#define INCLUDE_CONTAINERS_MIXED_NEGATION_H_
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Negation across the entire range of the container.
+ * Compute the negation of src and write the result
+ * to *dst. The complement of a
+ * sufficiently sparse set will always be dense and a hence a bitmap
+ * We assume that dst is pre-allocated and a valid bitset container
+ * There can be no in-place version.
+ */
+void array_container_negation(const array_container_t *src,
+ bitset_container_t *dst);
+
+/* Negation across the entire range of the container
+ * Compute the negation of src and write the result
+ * to *dst. A true return value indicates a bitset result,
+ * otherwise the result is an array container.
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_container_negation(
+ const bitset_container_t *src,
+ container_t **dst);
+
+/* inplace version */
+/*
+ * Same as bitset_container_negation except that if the output is to
+ * be a
+ * bitset_container_t, then src is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_container_negation_inplace(
+ bitset_container_t *src,
+ container_t **dst);
+
+/* Negation across the entire range of container
+ * Compute the negation of src and write the result
+ * to *dst.
+ * Return values are the *_TYPECODES as defined * in containers.h
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+int run_container_negation(const run_container_t *src, container_t **dst);
+
+/*
+ * Same as run_container_negation except that if the output is to
+ * be a
+ * run_container_t, and has the capacity to hold the result,
+ * then src is modified and no allocation is made.
+ * In all cases, the result is in *dst.
+ */
+int run_container_negation_inplace(run_container_t *src, container_t **dst);
+
+/* Negation across a range of the container.
+ * Compute the negation of src and write the result
+ * to *dst. Returns true if the result is a bitset container
+ * and false for an array container. *dst is not preallocated.
+ */
+bool array_container_negation_range(
+ const array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
+
+/* Even when the result would fit, it is unclear how to make an
+ * inplace version without inefficient copying. Thus this routine
+ * may be a wrapper for the non-in-place version
+ */
+bool array_container_negation_range_inplace(
+ array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
+
+/* Negation across a range of the container
+ * Compute the negation of src and write the result
+ * to *dst. A true return value indicates a bitset result,
+ * otherwise the result is an array container.
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_container_negation_range(
+ const bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
+
+/* inplace version */
+/*
+ * Same as bitset_container_negation except that if the output is to
+ * be a
+ * bitset_container_t, then src is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_container_negation_range_inplace(
+ bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
+
+/* Negation across a range of container
+ * Compute the negation of src and write the result
+ * to *dst. Return values are the *_TYPECODES as defined * in containers.h
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+int run_container_negation_range(
+ const run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
+
+/*
+ * Same as run_container_negation except that if the output is to
+ * be a
+ * run_container_t, and has the capacity to hold the result,
+ * then src is modified and no allocation is made.
+ * In all cases, the result is in *dst.
+ */
+int run_container_negation_range_inplace(
+ run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */
+/* end file include/roaring/containers/mixed_negation.h */
+/* begin file include/roaring/containers/mixed_union.h */
+/*
+ * mixed_intersection.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_
+#define INCLUDE_CONTAINERS_MIXED_UNION_H_
+
+/* These functions appear to exclude cases where the
+ * inputs have the same type and the output is guaranteed
+ * to have the same type as the inputs. Eg, bitset unions
+ */
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. It is allowed for src_2 to be dst. */
+void array_bitset_container_union(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. It is allowed for src_2 to be dst. This version does not
+ * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
+void array_bitset_container_lazy_union(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/*
+ * Compute the union between src_1 and src_2 and write the result
+ * to *dst. If the return function is true, the result is a bitset_container_t
+ * otherwise is a array_container_t. We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool array_array_container_union(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/*
+ * Compute the union between src_1 and src_2 and write the result
+ * to *dst if it cannot be written to src_1. If the return function is true,
+ * the result is a bitset_container_t
+ * otherwise is a array_container_t. When the result is an array_container_t, it
+ * it either written to src_1 (if *dst is null) or to *dst.
+ * If the result is a bitset_container_t and *dst is null, then there was a failure.
+ */
+bool array_array_container_inplace_union(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/*
+ * Same as array_array_container_union except that it will more eagerly produce
+ * a bitset.
+ */
+bool array_array_container_lazy_union(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/*
+ * Same as array_array_container_inplace_union except that it will more eagerly produce
+ * a bitset.
+ */
+bool array_array_container_lazy_inplace_union(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. We assume that dst is a
+ * valid container. The result might need to be further converted to array or
+ * bitset container,
+ * the caller is responsible for the eventual conversion. */
+void array_run_container_union(const array_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst);
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * src2. The result might need to be further converted to array or
+ * bitset container,
+ * the caller is responsible for the eventual conversion. */
+void array_run_container_inplace_union(const array_container_t *src_1,
+ run_container_t *src_2);
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. It is allowed for dst to be src_2.
+ * If run_container_is_full(src_1) is true, you must not be calling this
+ *function.
+ **/
+void run_bitset_container_union(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. It is allowed for dst to be src_2. This version does not
+ * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
+ * If run_container_is_full(src_1) is true, you must not be calling this
+ * function.
+ * */
+void run_bitset_container_lazy_union(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */
+/* end file include/roaring/containers/mixed_union.h */
+/* begin file include/roaring/containers/mixed_xor.h */
+/*
+ * mixed_xor.h
+ *
+ */
+
+#ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_
+#define INCLUDE_CONTAINERS_MIXED_XOR_H_
+
+/* These functions appear to exclude cases where the
+ * inputs have the same type and the output is guaranteed
+ * to have the same type as the inputs. Eg, bitset unions
+ */
+
+/*
+ * Java implementation (as of May 2016) for array_run, run_run
+ * and bitset_run don't do anything different for inplace.
+ * (They are not truly in place.)
+ */
+
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst (which has no container initially).
+ * Result is true iff dst is a bitset */
+bool array_bitset_container_xor(
+ const array_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst. It is allowed for src_2 to be dst. This version does not
+ * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
+ */
+
+void array_bitset_container_lazy_xor(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst (which has no container initially). Return value is
+ * "dst is a bitset"
+ */
+
+bool bitset_bitset_container_xor(
+ const bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_xor(
+ const run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* lazy xor. Dst is initialized and may be equal to src_2.
+ * Result is left as a bitset container, even if actual
+ * cardinality would dictate an array container.
+ */
+
+void run_bitset_container_lazy_xor(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst);
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int array_run_container_xor(
+ const array_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/* dst does not initially have a valid container. Creates either
+ * an array or a bitset container, indicated by return code
+ */
+
+bool array_array_container_xor(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* dst does not initially have a valid container. Creates either
+ * an array or a bitset container, indicated by return code.
+ * A bitset container will not have a valid cardinality and the
+ * container type might not be correct for the actual cardinality
+ */
+
+bool array_array_container_lazy_xor(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+/* Dst is a valid run container. (Can it be src_2? Let's say not.)
+ * Leaves result as run container, even if other options are
+ * smaller.
+ */
+
+void array_run_container_lazy_xor(const array_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst);
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int run_run_container_xor(
+ const run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/* INPLACE versions (initial implementation may not exploit all inplace
+ * opportunities (if any...)
+ */
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_array_container_ixor(
+ bitset_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+bool bitset_bitset_container_ixor(
+ bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+bool array_bitset_container_ixor(
+ array_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_ixor(
+ run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst);
+
+bool bitset_run_container_ixor(
+ bitset_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int array_run_container_ixor(
+ array_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+int run_array_container_ixor(
+ run_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+bool array_array_container_ixor(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst);
+
+int run_run_container_ixor(
+ run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif
+/* end file include/roaring/containers/mixed_xor.h */
+/* begin file include/roaring/containers/containers.h */
+#ifndef CONTAINERS_CONTAINERS_H
+#define CONTAINERS_CONTAINERS_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#ifndef WIN32
+#include "ndpi_config.h"
+
+#define NDPI_REPLACE_FPRINTF
+#include "../../../include/ndpi_replace_printf.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// would enum be possible or better?
+
+/**
+ * The switch case statements follow
+ * BITSET_CONTAINER_TYPE -- ARRAY_CONTAINER_TYPE -- RUN_CONTAINER_TYPE
+ * so it makes more sense to number them 1, 2, 3 (in the vague hope that the
+ * compiler might exploit this ordering).
+ */
+
+#define BITSET_CONTAINER_TYPE 1
+#define ARRAY_CONTAINER_TYPE 2
+#define RUN_CONTAINER_TYPE 3
+#define SHARED_CONTAINER_TYPE 4
+
+/**
+ * Macros for pairing container type codes, suitable for switch statements.
+ * Use PAIR_CONTAINER_TYPES() for the switch, CONTAINER_PAIR() for the cases:
+ *
+ * switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ * case CONTAINER_PAIR(BITSET,ARRAY):
+ * ...
+ * }
+ */
+#define PAIR_CONTAINER_TYPES(type1,type2) \
+ (4 * (type1) + (type2))
+
+#define CONTAINER_PAIR(name1,name2) \
+ (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
+
+/**
+ * A shared container is a wrapper around a container
+ * with reference counting.
+ */
+
+STRUCT_CONTAINER(shared_container_s) {
+ container_t *container;
+ uint8_t typecode;
+ uint32_t counter; // to be managed atomically
+};
+
+typedef struct shared_container_s shared_container_t;
+
+#define CAST_shared(c) CAST(shared_container_t *, c) // safer downcast
+#define const_CAST_shared(c) CAST(const shared_container_t *, c)
+#define movable_CAST_shared(c) movable_CAST(shared_container_t **, c)
+
+/*
+ * With copy_on_write = true
+ * Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE,
+ * otherwise, increase the count
+ * If copy_on_write = false, then clone.
+ * Return NULL in case of failure.
+ **/
+container_t *get_copy_of_container(container_t *container, uint8_t *typecode,
+ bool copy_on_write);
+
+/* Frees a shared container (actually decrement its counter and only frees when
+ * the counter falls to zero). */
+void shared_container_free(shared_container_t *container);
+
+/* extract a copy from the shared container, freeing the shared container if
+there is just one instance left,
+clone instances when the counter is higher than one
+*/
+container_t *shared_container_extract_copy(shared_container_t *container,
+ uint8_t *typecode);
+
+/* access to container underneath */
+static inline const container_t *container_unwrap_shared(
+ const container_t *candidate_shared_container, uint8_t *type
+){
+ if (*type == SHARED_CONTAINER_TYPE) {
+ *type = const_CAST_shared(candidate_shared_container)->typecode;
+ assert(*type != SHARED_CONTAINER_TYPE);
+ return const_CAST_shared(candidate_shared_container)->container;
+ } else {
+ return candidate_shared_container;
+ }
+}
+
+
+/* access to container underneath */
+static inline container_t *container_mutable_unwrap_shared(
+ container_t *c, uint8_t *type
+) {
+ if (*type == SHARED_CONTAINER_TYPE) { // the passed in container is shared
+ *type = CAST_shared(c)->typecode;
+ assert(*type != SHARED_CONTAINER_TYPE);
+ return CAST_shared(c)->container; // return the enclosed container
+ } else {
+ return c; // wasn't shared, so return as-is
+ }
+}
+
+/* access to container underneath and queries its type */
+static inline uint8_t get_container_type(
+ const container_t *c, uint8_t type
+){
+ if (type == SHARED_CONTAINER_TYPE) {
+ return const_CAST_shared(c)->typecode;
+ } else {
+ return type;
+ }
+}
+
+/**
+ * Copies a container, requires a typecode. This allocates new memory, caller
+ * is responsible for deallocation. If the container is not shared, then it is
+ * physically cloned. Sharable containers are not cloneable.
+ */
+container_t *container_clone(const container_t *container, uint8_t typecode);
+
+/* access to container underneath, cloning it if needed */
+static inline container_t *get_writable_copy_if_shared(
+ container_t *c, uint8_t *type
+){
+ if (*type == SHARED_CONTAINER_TYPE) { // shared, return enclosed container
+ return shared_container_extract_copy(CAST_shared(c), type);
+ } else {
+ return c; // not shared, so return as-is
+ }
+}
+
+/**
+ * End of shared container code
+ */
+
+static const char *container_names[] = {"bitset", "array", "run", "shared"};
+static const char *shared_container_names[] = {
+ "bitset (shared)", "array (shared)", "run (shared)"};
+
+// no matter what the initial container was, convert it to a bitset
+// if a new container is produced, caller responsible for freeing the previous
+// one
+// container should not be a shared container
+static inline bitset_container_t *container_to_bitset(
+ container_t *c, uint8_t typecode
+){
+ bitset_container_t *result = NULL;
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return CAST_bitset(c); // nothing to do
+ case ARRAY_CONTAINER_TYPE:
+ result = bitset_container_from_array(CAST_array(c));
+ return result;
+ case RUN_CONTAINER_TYPE:
+ result = bitset_container_from_run(CAST_run(c));
+ return result;
+ case SHARED_CONTAINER_TYPE:
+ assert(false);
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+/**
+ * Get the container name from the typecode
+ * (unused at time of writing)
+ */
+/*static inline const char *get_container_name(uint8_t typecode) {
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return container_names[0];
+ case ARRAY_CONTAINER_TYPE:
+ return container_names[1];
+ case RUN_CONTAINER_TYPE:
+ return container_names[2];
+ case SHARED_CONTAINER_TYPE:
+ return container_names[3];
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return "unknown";
+ }
+}*/
+
+static inline const char *get_full_container_name(
+ const container_t *c, uint8_t typecode
+){
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return container_names[0];
+ case ARRAY_CONTAINER_TYPE:
+ return container_names[1];
+ case RUN_CONTAINER_TYPE:
+ return container_names[2];
+ case SHARED_CONTAINER_TYPE:
+ switch (const_CAST_shared(c)->typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return shared_container_names[0];
+ case ARRAY_CONTAINER_TYPE:
+ return shared_container_names[1];
+ case RUN_CONTAINER_TYPE:
+ return shared_container_names[2];
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return "unknown";
+ }
+ break;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return "unknown";
+ }
+ __builtin_unreachable();
+ return NULL;
+}
+
+/**
+ * Get the container cardinality (number of elements), requires a typecode
+ */
+static inline int container_get_cardinality(
+ const container_t *c, uint8_t typecode
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_cardinality(const_CAST_bitset(c));
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_cardinality(const_CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_cardinality(const_CAST_run(c));
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+
+
+// returns true if a container is known to be full. Note that a lazy bitset
+// container
+// might be full without us knowing
+static inline bool container_is_full(const container_t *c, uint8_t typecode) {
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_cardinality(
+ const_CAST_bitset(c)) == (1 << 16);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_cardinality(
+ const_CAST_array(c)) == (1 << 16);
+ case RUN_CONTAINER_TYPE:
+ return run_container_is_full(const_CAST_run(c));
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+static inline int container_shrink_to_fit(
+ container_t *c, uint8_t type
+){
+ c = container_mutable_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return 0; // no shrinking possible
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_shrink_to_fit(CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_shrink_to_fit(CAST_run(c));
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+
+/**
+ * make a container with a run of ones
+ */
+/* initially always use a run container, even if an array might be
+ * marginally
+ * smaller */
+static inline container_t *container_range_of_ones(
+ uint32_t range_start, uint32_t range_end,
+ uint8_t *result_type
+){
+ assert(range_end >= range_start);
+ uint64_t cardinality = range_end - range_start + 1;
+ if(cardinality <= 2) {
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return array_container_create_range(range_start, range_end);
+ } else {
+ *result_type = RUN_CONTAINER_TYPE;
+ return run_container_create_range(range_start, range_end);
+ }
+}
+
+
+/* Create a container with all the values between in [min,max) at a
+ distance k*step from min. */
+static inline container_t *container_from_range(
+ uint8_t *type, uint32_t min,
+ uint32_t max, uint16_t step
+){
+ if (step == 0) return NULL; // being paranoid
+ if (step == 1) {
+ return container_range_of_ones(min,max,type);
+ // Note: the result is not always a run (need to check the cardinality)
+ //*type = RUN_CONTAINER_TYPE;
+ //return run_container_create_range(min, max);
+ }
+ int size = (max - min + step - 1) / step;
+ if (size <= DEFAULT_MAX_SIZE) { // array container
+ *type = ARRAY_CONTAINER_TYPE;
+ array_container_t *array = array_container_create_given_capacity(size);
+ array_container_add_from_range(array, min, max, step);
+ assert(array->cardinality == size);
+ return array;
+ } else { // bitset container
+ *type = BITSET_CONTAINER_TYPE;
+ bitset_container_t *bitset = bitset_container_create();
+ bitset_container_add_from_range(bitset, min, max, step);
+ assert(bitset->cardinality == size);
+ return bitset;
+ }
+}
+
+/**
+ * "repair" the container after lazy operations.
+ */
+static inline container_t *container_repair_after_lazy(
+ container_t *c, uint8_t *type
+){
+ c = get_writable_copy_if_shared(c, type); // !!! unnecessary cloning
+ container_t *result = NULL;
+ switch (*type) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_container_t *bc = CAST_bitset(c);
+ bc->cardinality = bitset_container_compute_cardinality(bc);
+ if (bc->cardinality <= DEFAULT_MAX_SIZE) {
+ result = array_container_from_bitset(bc);
+ bitset_container_free(bc);
+ *type = ARRAY_CONTAINER_TYPE;
+ return result;
+ }
+ return c; }
+ case ARRAY_CONTAINER_TYPE:
+ return c; // nothing to do
+ case RUN_CONTAINER_TYPE:
+ return convert_run_to_efficient_container_and_free(
+ CAST_run(c), type);
+ case SHARED_CONTAINER_TYPE:
+ assert(false);
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+/**
+ * Writes the underlying array to buf, outputs how many bytes were written.
+ * This is meant to be byte-by-byte compatible with the Java and Go versions of
+ * Roaring.
+ * The number of bytes written should be
+ * container_write(container, buf).
+ *
+ */
+static inline int32_t container_write(
+ const container_t *c, uint8_t typecode,
+ char *buf
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_write(const_CAST_bitset(c), buf);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_write(const_CAST_array(c), buf);
+ case RUN_CONTAINER_TYPE:
+ return run_container_write(const_CAST_run(c), buf);
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+/**
+ * Get the container size in bytes under portable serialization (see
+ * container_write), requires a
+ * typecode
+ */
+static inline int32_t container_size_in_bytes(
+ const container_t *c, uint8_t typecode
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_size_in_bytes(const_CAST_bitset(c));
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_size_in_bytes(const_CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_size_in_bytes(const_CAST_run(c));
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/**
+ * print the container (useful for debugging), requires a typecode
+ */
+void container_printf(const container_t *container, uint8_t typecode);
+
+/**
+ * print the content of the container as a comma-separated list of 32-bit values
+ * starting at base, requires a typecode
+ */
+void container_printf_as_uint32_array(const container_t *container,
+ uint8_t typecode, uint32_t base);
+#endif
+
+/**
+ * Checks whether a container is not empty, requires a typecode
+ */
+static inline bool container_nonzero_cardinality(
+ const container_t *c, uint8_t typecode
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_const_nonzero_cardinality(
+ const_CAST_bitset(c));
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_nonzero_cardinality(const_CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_nonzero_cardinality(const_CAST_run(c));
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+/**
+ * Recover memory from a container, requires a typecode
+ */
+void container_free(container_t *container, uint8_t typecode);
+
+/**
+ * Convert a container to an array of values, requires a typecode as well as a
+ * "base" (most significant values)
+ * Returns number of ints added.
+ */
+static inline int container_to_uint32_array(
+ uint32_t *output,
+ const container_t *c, uint8_t typecode,
+ uint32_t base
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_to_uint32_array(
+ output, const_CAST_bitset(c), base);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_to_uint32_array(
+ output, const_CAST_array(c), base);
+ case RUN_CONTAINER_TYPE:
+ return run_container_to_uint32_array(
+ output, const_CAST_run(c), base);
+ }
+ assert(false);
+ __builtin_unreachable();
+ return 0; // unreached
+}
+
+/**
+ * Add a value to a container, requires a typecode, fills in new_typecode and
+ * return (possibly different) container.
+ * This function may allocate a new container, and caller is responsible for
+ * memory deallocation
+ */
+static inline container_t *container_add(
+ container_t *c, uint16_t val,
+ uint8_t typecode, // !!! should be second argument?
+ uint8_t *new_typecode
+){
+ c = get_writable_copy_if_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_set(CAST_bitset(c), val);
+ *new_typecode = BITSET_CONTAINER_TYPE;
+ return c;
+ case ARRAY_CONTAINER_TYPE: {
+ array_container_t *ac = CAST_array(c);
+ if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) {
+ *new_typecode = ARRAY_CONTAINER_TYPE;
+ return ac;
+ } else {
+ bitset_container_t* bitset = bitset_container_from_array(ac);
+ bitset_container_add(bitset, val);
+ *new_typecode = BITSET_CONTAINER_TYPE;
+ return bitset;
+ }
+ } break;
+ case RUN_CONTAINER_TYPE:
+ // per Java, no container type adjustments are done (revisit?)
+ run_container_add(CAST_run(c), val);
+ *new_typecode = RUN_CONTAINER_TYPE;
+ return c;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Remove a value from a container, requires a typecode, fills in new_typecode
+ * and
+ * return (possibly different) container.
+ * This function may allocate a new container, and caller is responsible for
+ * memory deallocation
+ */
+static inline container_t *container_remove(
+ container_t *c, uint16_t val,
+ uint8_t typecode, // !!! should be second argument?
+ uint8_t *new_typecode
+){
+ c = get_writable_copy_if_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ if (bitset_container_remove(CAST_bitset(c), val)) {
+ int card = bitset_container_cardinality(CAST_bitset(c));
+ if (card <= DEFAULT_MAX_SIZE) {
+ *new_typecode = ARRAY_CONTAINER_TYPE;
+ return array_container_from_bitset(CAST_bitset(c));
+ }
+ }
+ *new_typecode = typecode;
+ return c;
+ case ARRAY_CONTAINER_TYPE:
+ *new_typecode = typecode;
+ array_container_remove(CAST_array(c), val);
+ return c;
+ case RUN_CONTAINER_TYPE:
+ // per Java, no container type adjustments are done (revisit?)
+ run_container_remove(CAST_run(c), val);
+ *new_typecode = RUN_CONTAINER_TYPE;
+ return c;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Check whether a value is in a container, requires a typecode
+ */
+static inline bool container_contains(
+ const container_t *c,
+ uint16_t val,
+ uint8_t typecode // !!! should be second argument?
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_get(const_CAST_bitset(c), val);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_contains(const_CAST_array(c), val);
+ case RUN_CONTAINER_TYPE:
+ return run_container_contains(const_CAST_run(c), val);
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return false;
+ }
+}
+
+/**
+ * Check whether a range of values from range_start (included) to range_end (excluded)
+ * is in a container, requires a typecode
+ */
+static inline bool container_contains_range(
+ const container_t *c,
+ uint32_t range_start, uint32_t range_end,
+ uint8_t typecode // !!! should be second argument?
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_get_range(const_CAST_bitset(c),
+ range_start, range_end);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_contains_range(const_CAST_array(c),
+ range_start, range_end);
+ case RUN_CONTAINER_TYPE:
+ return run_container_contains_range(const_CAST_run(c),
+ range_start, range_end);
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return false;
+ }
+}
+
+/**
+ * Returns true if the two containers have the same content. Note that
+ * two containers having different types can be "equal" in this sense.
+ */
+static inline bool container_equals(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ return bitset_container_equals(const_CAST_bitset(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ return run_container_equals_bitset(const_CAST_run(c2),
+ const_CAST_bitset(c1));
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ return run_container_equals_bitset(const_CAST_run(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ // java would always return false?
+ return array_container_equal_bitset(const_CAST_array(c2),
+ const_CAST_bitset(c1));
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ // java would always return false?
+ return array_container_equal_bitset(const_CAST_array(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ return run_container_equals_array(const_CAST_run(c2),
+ const_CAST_array(c1));
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ return run_container_equals_array(const_CAST_run(c1),
+ const_CAST_array(c2));
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ return array_container_equals(const_CAST_array(c1),
+ const_CAST_array(c2));
+
+ case CONTAINER_PAIR(RUN,RUN):
+ return run_container_equals(const_CAST_run(c1),
+ const_CAST_run(c2));
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return false;
+ }
+}
+
+/**
+ * Returns true if the container c1 is a subset of the container c2. Note that
+ * c1 can be a subset of c2 even if they have a different type.
+ */
+static inline bool container_is_subset(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ return bitset_container_is_subset(const_CAST_bitset(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ return bitset_container_is_subset_run(const_CAST_bitset(c1),
+ const_CAST_run(c2));
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ return run_container_is_subset_bitset(const_CAST_run(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ return false; // by construction, size(c1) > size(c2)
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ return array_container_is_subset_bitset(const_CAST_array(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ return array_container_is_subset_run(const_CAST_array(c1),
+ const_CAST_run(c2));
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ return run_container_is_subset_array(const_CAST_run(c1),
+ const_CAST_array(c2));
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ return array_container_is_subset(const_CAST_array(c1),
+ const_CAST_array(c2));
+
+ case CONTAINER_PAIR(RUN,RUN):
+ return run_container_is_subset(const_CAST_run(c1),
+ const_CAST_run(c2));
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return false;
+ }
+}
+
+// macro-izations possibilities for generic non-inplace binary-op dispatch
+
+/**
+ * Compute intersection between two containers, generate a new container (having
+ * type result_type), requires a typecode. This allocates new memory, caller
+ * is responsible for deallocation.
+ */
+static inline container_t *container_and(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ *result_type = bitset_bitset_container_intersection(
+ const_CAST_bitset(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ result = array_container_create();
+ array_container_intersection(const_CAST_array(c1),
+ const_CAST_array(c2),
+ CAST_array(result));
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ result = run_container_create();
+ run_container_intersection(const_CAST_run(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ return convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ result = array_container_create();
+ array_bitset_container_intersection(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_array(result));
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ array_bitset_container_intersection(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_array(result));
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ *result_type = run_bitset_container_intersection(
+ const_CAST_run(c2),
+ const_CAST_bitset(c1), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ *result_type = run_bitset_container_intersection(
+ const_CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ array_run_container_intersection(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_array(result));
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ array_run_container_intersection(const_CAST_array(c2),
+ const_CAST_run(c1),
+ CAST_array(result));
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Compute the size of the intersection between two containers.
+ */
+static inline int container_and_cardinality(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ return bitset_container_and_justcard(
+ const_CAST_bitset(c1), const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ return array_container_intersection_cardinality(
+ const_CAST_array(c1), const_CAST_array(c2));
+
+ case CONTAINER_PAIR(RUN,RUN):
+ return run_container_intersection_cardinality(
+ const_CAST_run(c1), const_CAST_run(c2));
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ return array_bitset_container_intersection_cardinality(
+ const_CAST_array(c2), const_CAST_bitset(c1));
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ return array_bitset_container_intersection_cardinality(
+ const_CAST_array(c1), const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ return run_bitset_container_intersection_cardinality(
+ const_CAST_run(c2), const_CAST_bitset(c1));
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ return run_bitset_container_intersection_cardinality(
+ const_CAST_run(c1), const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ return array_run_container_intersection_cardinality(
+ const_CAST_array(c1), const_CAST_run(c2));
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ return array_run_container_intersection_cardinality(
+ const_CAST_array(c2), const_CAST_run(c1));
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return 0;
+ }
+}
+
+/**
+ * Check whether two containers intersect.
+ */
+static inline bool container_intersect(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ return bitset_container_intersect(const_CAST_bitset(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ return array_container_intersect(const_CAST_array(c1),
+ const_CAST_array(c2));
+
+ case CONTAINER_PAIR(RUN,RUN):
+ return run_container_intersect(const_CAST_run(c1),
+ const_CAST_run(c2));
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ return array_bitset_container_intersect(const_CAST_array(c2),
+ const_CAST_bitset(c1));
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ return array_bitset_container_intersect(const_CAST_array(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ return run_bitset_container_intersect(const_CAST_run(c2),
+ const_CAST_bitset(c1));
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ return run_bitset_container_intersect(const_CAST_run(c1),
+ const_CAST_bitset(c2));
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ return array_run_container_intersect(const_CAST_array(c1),
+ const_CAST_run(c2));
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ return array_run_container_intersect(const_CAST_array(c2),
+ const_CAST_run(c1));
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return 0;
+ }
+}
+
+/**
+ * Compute intersection between two containers, with result in the first
+ container if possible. If the returned pointer is identical to c1,
+ then the container has been modified. If the returned pointer is different
+ from c1, then a new container has been created and the caller is responsible
+ for freeing it.
+ The type of the first container may change. Returns the modified
+ (and possibly new) container.
+*/
+static inline container_t *container_iand(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ *result_type =
+ bitset_bitset_container_intersection_inplace(
+ CAST_bitset(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ array_container_intersection_inplace(CAST_array(c1),
+ const_CAST_array(c2));
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ result = run_container_create();
+ run_container_intersection(const_CAST_run(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ // as of January 2016, Java code used non-in-place intersection for
+ // two runcontainers
+ return convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ // c1 is a bitmap so no inplace possible
+ result = array_container_create();
+ array_bitset_container_intersection(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_array(result));
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ array_bitset_container_intersection(
+ const_CAST_array(c1), const_CAST_bitset(c2),
+ CAST_array(c1)); // result is allowed to be same as c1
+ return c1;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ // will attempt in-place computation
+ *result_type = run_bitset_container_intersection(
+ const_CAST_run(c2),
+ const_CAST_bitset(c1), &c1)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ *result_type = run_bitset_container_intersection(
+ const_CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ array_run_container_intersection(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_array(result));
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE; // never bitset
+ array_run_container_intersection(const_CAST_array(c2),
+ const_CAST_run(c1),
+ CAST_array(result));
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Compute union between two containers, generate a new container (having type
+ * result_type), requires a typecode. This allocates new memory, caller
+ * is responsible for deallocation.
+ */
+static inline container_t *container_or(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ result = bitset_container_create();
+ bitset_container_or(const_CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_union(
+ const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ result = run_container_create();
+ run_container_union(const_CAST_run(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ *result_type = RUN_CONTAINER_TYPE;
+ // todo: could be optimized since will never convert to array
+ result = convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ result = bitset_container_create();
+ array_bitset_container_union(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ result = bitset_container_create();
+ array_bitset_container_union(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = run_container_create();
+ *result_type = RUN_CONTAINER_TYPE;
+ run_container_copy(const_CAST_run(c2),
+ CAST_run(result));
+ return result;
+ }
+ result = bitset_container_create();
+ run_bitset_container_union(const_CAST_run(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ if (run_container_is_full(const_CAST_run(c1))) {
+ result = run_container_create();
+ *result_type = RUN_CONTAINER_TYPE;
+ run_container_copy(const_CAST_run(c1),
+ CAST_run(result));
+ return result;
+ }
+ result = bitset_container_create();
+ run_bitset_container_union(const_CAST_run(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = run_container_create();
+ array_run_container_union(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ result = convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ result = run_container_create();
+ array_run_container_union(const_CAST_array(c2),
+ const_CAST_run(c1),
+ CAST_run(result));
+ result = convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL; // unreached
+ }
+}
+
+/**
+ * Compute union between two containers, generate a new container (having type
+ * result_type), requires a typecode. This allocates new memory, caller
+ * is responsible for deallocation.
+ *
+ * This lazy version delays some operations such as the maintenance of the
+ * cardinality. It requires repair later on the generated containers.
+ */
+static inline container_t *container_lazy_or(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ result = bitset_container_create();
+ bitset_container_or_nocard(
+ const_CAST_bitset(c1), const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_lazy_union(
+ const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ result = run_container_create();
+ run_container_union(const_CAST_run(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ *result_type = RUN_CONTAINER_TYPE;
+ // we are being lazy
+ result = convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ result = bitset_container_create();
+ array_bitset_container_lazy_union(
+ const_CAST_array(c2), const_CAST_bitset(c1),
+ CAST_bitset(result)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ result = bitset_container_create();
+ array_bitset_container_lazy_union(
+ const_CAST_array(c1), const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = run_container_create();
+ *result_type = RUN_CONTAINER_TYPE;
+ run_container_copy(const_CAST_run(c2), CAST_run(result));
+ return result;
+ }
+ result = bitset_container_create();
+ run_bitset_container_lazy_union(
+ const_CAST_run(c2), const_CAST_bitset(c1),
+ CAST_bitset(result)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ if (run_container_is_full(const_CAST_run(c1))) {
+ result = run_container_create();
+ *result_type = RUN_CONTAINER_TYPE;
+ run_container_copy(const_CAST_run(c1), CAST_run(result));
+ return result;
+ }
+ result = bitset_container_create();
+ run_bitset_container_lazy_union(
+ const_CAST_run(c1), const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = run_container_create();
+ array_run_container_union(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ *result_type = RUN_CONTAINER_TYPE;
+ // next line skipped since we are lazy
+ // result = convert_run_to_efficient_container(result, result_type);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ result = run_container_create();
+ array_run_container_union(
+ const_CAST_array(c2), const_CAST_run(c1),
+ CAST_run(result)); // TODO make lazy
+ *result_type = RUN_CONTAINER_TYPE;
+ // next line skipped since we are lazy
+ // result = convert_run_to_efficient_container(result, result_type);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL; // unreached
+ }
+}
+
+/**
+ * Compute the union between two containers, with result in the first container.
+ * If the returned pointer is identical to c1, then the container has been
+ * modified.
+ * If the returned pointer is different from c1, then a new container has been
+ * created and the caller is responsible for freeing it.
+ * The type of the first container may change. Returns the modified
+ * (and possibly new) container
+*/
+static inline container_t *container_ior(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ bitset_container_or(const_CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(c1));
+#ifdef OR_BITSET_CONVERSION_TO_FULL
+ if (CAST_bitset(c1)->cardinality == (1 << 16)) { // we convert
+ result = run_container_create_range(0, (1 << 16));
+ *result_type = RUN_CONTAINER_TYPE;
+ return result;
+ }
+#endif
+ *result_type = BITSET_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_inplace_union(
+ CAST_array(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ if((result == NULL)
+ && (*result_type == ARRAY_CONTAINER_TYPE)) {
+ return c1; // the computation was done in-place!
+ }
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ run_container_union_inplace(CAST_run(c1), const_CAST_run(c2));
+ return convert_run_to_efficient_container(CAST_run(c1),
+ result_type);
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ array_bitset_container_union(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(c1));
+ *result_type = BITSET_CONTAINER_TYPE; // never array
+ return c1;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ // c1 is an array, so no in-place possible
+ result = bitset_container_create();
+ *result_type = BITSET_CONTAINER_TYPE;
+ array_bitset_container_union(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = run_container_create();
+ *result_type = RUN_CONTAINER_TYPE;
+ run_container_copy(const_CAST_run(c2), CAST_run(result));
+ return result;
+ }
+ run_bitset_container_union(const_CAST_run(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(c1)); // allowed
+ *result_type = BITSET_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ if (run_container_is_full(const_CAST_run(c1))) {
+ *result_type = RUN_CONTAINER_TYPE;
+ return c1;
+ }
+ result = bitset_container_create();
+ run_bitset_container_union(const_CAST_run(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = run_container_create();
+ array_run_container_union(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ result = convert_run_to_efficient_container_and_free(
+ CAST_run(result), result_type);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ array_run_container_inplace_union(const_CAST_array(c2),
+ CAST_run(c1));
+ c1 = convert_run_to_efficient_container(CAST_run(c1),
+ result_type);
+ return c1;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Compute the union between two containers, with result in the first container.
+ * If the returned pointer is identical to c1, then the container has been
+ * modified.
+ * If the returned pointer is different from c1, then a new container has been
+ * created and the caller is responsible for freeing it.
+ * The type of the first container may change. Returns the modified
+ * (and possibly new) container
+ *
+ * This lazy version delays some operations such as the maintenance of the
+ * cardinality. It requires repair later on the generated containers.
+*/
+static inline container_t *container_lazy_ior(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ assert(type1 != SHARED_CONTAINER_TYPE);
+ // c1 = get_writable_copy_if_shared(c1,&type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL
+ // if we have two bitsets, we might as well compute the cardinality
+ bitset_container_or(const_CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(c1));
+ // it is possible that two bitsets can lead to a full container
+ if (CAST_bitset(c1)->cardinality == (1 << 16)) { // we convert
+ result = run_container_create_range(0, (1 << 16));
+ *result_type = RUN_CONTAINER_TYPE;
+ return result;
+ }
+#else
+ bitset_container_or_nocard(const_CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(c1));
+
+#endif
+ *result_type = BITSET_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_lazy_inplace_union(
+ CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ if((result == NULL)
+ && (*result_type == ARRAY_CONTAINER_TYPE)) {
+ return c1; // the computation was done in-place!
+ }
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ run_container_union_inplace(CAST_run(c1),
+ const_CAST_run(c2));
+ *result_type = RUN_CONTAINER_TYPE;
+ return convert_run_to_efficient_container(CAST_run(c1),
+ result_type);
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ array_bitset_container_lazy_union(
+ const_CAST_array(c2), const_CAST_bitset(c1),
+ CAST_bitset(c1)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE; // never array
+ return c1;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ // c1 is an array, so no in-place possible
+ result = bitset_container_create();
+ *result_type = BITSET_CONTAINER_TYPE;
+ array_bitset_container_lazy_union(
+ const_CAST_array(c1), const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = run_container_create();
+ *result_type = RUN_CONTAINER_TYPE;
+ run_container_copy(const_CAST_run(c2),
+ CAST_run(result));
+ return result;
+ }
+ run_bitset_container_lazy_union(
+ const_CAST_run(c2), const_CAST_bitset(c1),
+ CAST_bitset(c1)); // allowed // lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ if (run_container_is_full(const_CAST_run(c1))) {
+ *result_type = RUN_CONTAINER_TYPE;
+ return c1;
+ }
+ result = bitset_container_create();
+ run_bitset_container_lazy_union(
+ const_CAST_run(c1), const_CAST_bitset(c2),
+ CAST_bitset(result)); // lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = run_container_create();
+ array_run_container_union(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ *result_type = RUN_CONTAINER_TYPE;
+ // next line skipped since we are lazy
+ // result = convert_run_to_efficient_container_and_free(result,
+ // result_type);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ array_run_container_inplace_union(const_CAST_array(c2),
+ CAST_run(c1));
+ *result_type = RUN_CONTAINER_TYPE;
+ // next line skipped since we are lazy
+ // result = convert_run_to_efficient_container_and_free(result,
+ // result_type);
+ return c1;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Compute symmetric difference (xor) between two containers, generate a new
+ * container (having type result_type), requires a typecode. This allocates new
+ * memory, caller is responsible for deallocation.
+ */
+static inline container_t* container_xor(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ *result_type = bitset_bitset_container_xor(
+ const_CAST_bitset(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_xor(
+ const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ *result_type =
+ run_run_container_xor(const_CAST_run(c1),
+ const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ *result_type = array_bitset_container_xor(
+ const_CAST_array(c2),
+ const_CAST_bitset(c1), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ *result_type = array_bitset_container_xor(
+ const_CAST_array(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ *result_type = run_bitset_container_xor(
+ const_CAST_run(c2),
+ const_CAST_bitset(c1), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ *result_type = run_bitset_container_xor(
+ const_CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ *result_type =
+ array_run_container_xor(const_CAST_array(c1),
+ const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ *result_type =
+ array_run_container_xor(const_CAST_array(c2),
+ const_CAST_run(c1), &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL; // unreached
+ }
+}
+
+/* Applies an offset to the non-empty container 'c'.
+ * The results are stored in new containers returned via 'lo' and 'hi', for the
+ * low and high halves of the result (where the low half matches the original key
+ * and the high one corresponds to values for the following key).
+ * Either one of 'lo' and 'hi' are allowed to be 'NULL', but not both.
+ * Whenever one of them is not 'NULL', it should point to a 'NULL' container.
+ * Whenever one of them is 'NULL' the shifted elements for that part will not be
+ * computed.
+ * If either of the resulting containers turns out to be empty, the pointed
+ * container will remain 'NULL'.
+ */
+static inline void container_add_offset(const container_t *c, uint8_t type,
+ container_t **lo, container_t **hi,
+ uint16_t offset) {
+ assert(offset != 0);
+ assert(container_nonzero_cardinality(c, type));
+ assert(lo != NULL || hi != NULL);
+ assert(lo == NULL || *lo == NULL);
+ assert(hi == NULL || *hi == NULL);
+
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_offset(const_CAST_bitset(c), lo, hi, offset);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ array_container_offset(const_CAST_array(c), lo, hi, offset);
+ break;
+ case RUN_CONTAINER_TYPE:
+ run_container_offset(const_CAST_run(c), lo, hi, offset);
+ break;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ break;
+ }
+}
+
+/**
+ * Compute xor between two containers, generate a new container (having type
+ * result_type), requires a typecode. This allocates new memory, caller
+ * is responsible for deallocation.
+ *
+ * This lazy version delays some operations such as the maintenance of the
+ * cardinality. It requires repair later on the generated containers.
+ */
+static inline container_t *container_lazy_xor(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ result = bitset_container_create();
+ bitset_container_xor_nocard(
+ const_CAST_bitset(c1), const_CAST_bitset(c2),
+ CAST_bitset(result)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_lazy_xor(
+ const_CAST_array(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ // nothing special done yet.
+ *result_type =
+ run_run_container_xor(const_CAST_run(c1),
+ const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ result = bitset_container_create();
+ *result_type = BITSET_CONTAINER_TYPE;
+ array_bitset_container_lazy_xor(const_CAST_array(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(result));
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ result = bitset_container_create();
+ *result_type = BITSET_CONTAINER_TYPE;
+ array_bitset_container_lazy_xor(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ result = bitset_container_create();
+ run_bitset_container_lazy_xor(const_CAST_run(c2),
+ const_CAST_bitset(c1),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ result = bitset_container_create();
+ run_bitset_container_lazy_xor(const_CAST_run(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(result));
+ *result_type = BITSET_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ result = run_container_create();
+ array_run_container_lazy_xor(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_run(result));
+ *result_type = RUN_CONTAINER_TYPE;
+ // next line skipped since we are lazy
+ // result = convert_run_to_efficient_container(result, result_type);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ result = run_container_create();
+ array_run_container_lazy_xor(const_CAST_array(c2),
+ const_CAST_run(c1),
+ CAST_run(result));
+ *result_type = RUN_CONTAINER_TYPE;
+ // next line skipped since we are lazy
+ // result = convert_run_to_efficient_container(result, result_type);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL; // unreached
+ }
+}
+
+/**
+ * Compute the xor between two containers, with result in the first container.
+ * If the returned pointer is identical to c1, then the container has been
+ * modified.
+ * If the returned pointer is different from c1, then a new container has been
+ * created and the caller is responsible for freeing it.
+ * The type of the first container may change. Returns the modified
+ * (and possibly new) container
+*/
+static inline container_t *container_ixor(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ *result_type = bitset_bitset_container_ixor(
+ CAST_bitset(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ *result_type = array_array_container_ixor(
+ CAST_array(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ *result_type = run_run_container_ixor(
+ CAST_run(c1), const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ *result_type = bitset_array_container_ixor(
+ CAST_bitset(c1), const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ *result_type = array_bitset_container_ixor(
+ CAST_array(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ *result_type =
+ bitset_run_container_ixor(
+ CAST_bitset(c1), const_CAST_run(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ *result_type = run_bitset_container_ixor(
+ CAST_run(c1), const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ *result_type = array_run_container_ixor(
+ CAST_array(c1), const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ *result_type = run_array_container_ixor(
+ CAST_run(c1), const_CAST_array(c2), &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Compute the xor between two containers, with result in the first container.
+ * If the returned pointer is identical to c1, then the container has been
+ * modified.
+ * If the returned pointer is different from c1, then a new container has been
+ * created and the caller is responsible for freeing it.
+ * The type of the first container may change. Returns the modified
+ * (and possibly new) container
+ *
+ * This lazy version delays some operations such as the maintenance of the
+ * cardinality. It requires repair later on the generated containers.
+*/
+static inline container_t *container_lazy_ixor(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ assert(type1 != SHARED_CONTAINER_TYPE);
+ // c1 = get_writable_copy_if_shared(c1,&type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ bitset_container_xor_nocard(CAST_bitset(c1),
+ const_CAST_bitset(c2),
+ CAST_bitset(c1)); // is lazy
+ *result_type = BITSET_CONTAINER_TYPE;
+ return c1;
+
+ // TODO: other cases being lazy, esp. when we know inplace not likely
+ // could see the corresponding code for union
+ default:
+ // we may have a dirty bitset (without a precomputed cardinality)
+ // and calling container_ixor on it might be unsafe.
+ if (type1 == BITSET_CONTAINER_TYPE) {
+ bitset_container_t *bc = CAST_bitset(c1);
+ if (bc->cardinality == BITSET_UNKNOWN_CARDINALITY) {
+ bc->cardinality = bitset_container_compute_cardinality(bc);
+ }
+ }
+ return container_ixor(c1, type1, c2, type2, result_type);
+ }
+}
+
+/**
+ * Compute difference (andnot) between two containers, generate a new
+ * container (having type result_type), requires a typecode. This allocates new
+ * memory, caller is responsible for deallocation.
+ */
+static inline container_t *container_andnot(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = container_unwrap_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ *result_type = bitset_bitset_container_andnot(
+ const_CAST_bitset(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ result = array_container_create();
+ array_array_container_andnot(const_CAST_array(c1),
+ const_CAST_array(c2),
+ CAST_array(result));
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return result;
+ }
+ *result_type =
+ run_run_container_andnot(const_CAST_run(c1),
+ const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ *result_type = bitset_array_container_andnot(
+ const_CAST_bitset(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ result = array_container_create();
+ array_bitset_container_andnot(const_CAST_array(c1),
+ const_CAST_bitset(c2),
+ CAST_array(result));
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return result;
+ }
+ *result_type = bitset_run_container_andnot(
+ const_CAST_bitset(c1),
+ const_CAST_run(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ *result_type = run_bitset_container_andnot(
+ const_CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ if (run_container_is_full(const_CAST_run(c2))) {
+ result = array_container_create();
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return result;
+ }
+ result = array_container_create();
+ array_run_container_andnot(const_CAST_array(c1),
+ const_CAST_run(c2),
+ CAST_array(result));
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ *result_type = run_array_container_andnot(
+ const_CAST_run(c1), const_CAST_array(c2),
+ &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL; // unreached
+ }
+}
+
+/**
+ * Compute the andnot between two containers, with result in the first
+ * container.
+ * If the returned pointer is identical to c1, then the container has been
+ * modified.
+ * If the returned pointer is different from c1, then a new container has been
+ * created and the caller is responsible for freeing it.
+ * The type of the first container may change. Returns the modified
+ * (and possibly new) container
+*/
+static inline container_t *container_iandnot(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type
+){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ c2 = container_unwrap_shared(c2, &type2);
+ container_t *result = NULL;
+ switch (PAIR_CONTAINER_TYPES(type1, type2)) {
+ case CONTAINER_PAIR(BITSET,BITSET):
+ *result_type = bitset_bitset_container_iandnot(
+ CAST_bitset(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,ARRAY):
+ array_array_container_iandnot(CAST_array(c1),
+ const_CAST_array(c2));
+ *result_type = ARRAY_CONTAINER_TYPE;
+ return c1;
+
+ case CONTAINER_PAIR(RUN,RUN):
+ *result_type = run_run_container_iandnot(
+ CAST_run(c1), const_CAST_run(c2), &result);
+ return result;
+
+ case CONTAINER_PAIR(BITSET,ARRAY):
+ *result_type = bitset_array_container_iandnot(
+ CAST_bitset(c1),
+ const_CAST_array(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,BITSET):
+ *result_type = ARRAY_CONTAINER_TYPE;
+ array_bitset_container_iandnot(CAST_array(c1),
+ const_CAST_bitset(c2));
+ return c1;
+
+ case CONTAINER_PAIR(BITSET,RUN):
+ *result_type = bitset_run_container_iandnot(
+ CAST_bitset(c1),
+ const_CAST_run(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(RUN,BITSET):
+ *result_type = run_bitset_container_iandnot(
+ CAST_run(c1),
+ const_CAST_bitset(c2), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+
+ case CONTAINER_PAIR(ARRAY,RUN):
+ *result_type = ARRAY_CONTAINER_TYPE;
+ array_run_container_iandnot(CAST_array(c1),
+ const_CAST_run(c2));
+ return c1;
+
+ case CONTAINER_PAIR(RUN,ARRAY):
+ *result_type = run_array_container_iandnot(
+ CAST_run(c1), const_CAST_array(c2), &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+/**
+ * Visit all values x of the container once, passing (base+x,ptr)
+ * to iterator. You need to specify a container and its type.
+ * Returns true if the iteration should continue.
+ */
+static inline bool container_iterate(
+ const container_t *c, uint8_t type,
+ uint32_t base,
+ roaring_iterator iterator, void *ptr
+){
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_iterate(const_CAST_bitset(c),
+ base, iterator, ptr);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_iterate(const_CAST_array(c),
+ base, iterator, ptr);
+ case RUN_CONTAINER_TYPE:
+ return run_container_iterate(const_CAST_run(c),
+ base, iterator, ptr);
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return false;
+}
+
+static inline bool container_iterate64(
+ const container_t *c, uint8_t type,
+ uint32_t base,
+ roaring_iterator64 iterator,
+ uint64_t high_bits, void *ptr
+){
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_iterate64(const_CAST_bitset(c), base,
+ iterator, high_bits, ptr);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_iterate64(const_CAST_array(c), base,
+ iterator, high_bits, ptr);
+ case RUN_CONTAINER_TYPE:
+ return run_container_iterate64(const_CAST_run(c), base,
+ iterator, high_bits, ptr);
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return false;
+}
+
+static inline container_t *container_not(
+ const container_t *c, uint8_t type,
+ uint8_t *result_type
+){
+ c = container_unwrap_shared(c, &type);
+ container_t *result = NULL;
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ *result_type = bitset_container_negation(
+ const_CAST_bitset(c), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+ case ARRAY_CONTAINER_TYPE:
+ result = bitset_container_create();
+ *result_type = BITSET_CONTAINER_TYPE;
+ array_container_negation(const_CAST_array(c),
+ CAST_bitset(result));
+ return result;
+ case RUN_CONTAINER_TYPE:
+ *result_type =
+ run_container_negation(const_CAST_run(c), &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+}
+
+static inline container_t *container_not_range(
+ const container_t *c, uint8_t type,
+ uint32_t range_start, uint32_t range_end,
+ uint8_t *result_type
+){
+ c = container_unwrap_shared(c, &type);
+ container_t *result = NULL;
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ *result_type =
+ bitset_container_negation_range(
+ const_CAST_bitset(c), range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+ case ARRAY_CONTAINER_TYPE:
+ *result_type =
+ array_container_negation_range(
+ const_CAST_array(c), range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+ case RUN_CONTAINER_TYPE:
+ *result_type = run_container_negation_range(
+ const_CAST_run(c), range_start, range_end, &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+}
+
+static inline container_t *container_inot(
+ container_t *c, uint8_t type,
+ uint8_t *result_type
+){
+ c = get_writable_copy_if_shared(c, &type);
+ container_t *result = NULL;
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ *result_type = bitset_container_negation_inplace(
+ CAST_bitset(c), &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+ case ARRAY_CONTAINER_TYPE:
+ // will never be inplace
+ result = bitset_container_create();
+ *result_type = BITSET_CONTAINER_TYPE;
+ array_container_negation(CAST_array(c),
+ CAST_bitset(result));
+ array_container_free(CAST_array(c));
+ return result;
+ case RUN_CONTAINER_TYPE:
+ *result_type =
+ run_container_negation_inplace(CAST_run(c), &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+}
+
+static inline container_t *container_inot_range(
+ container_t *c, uint8_t type,
+ uint32_t range_start, uint32_t range_end,
+ uint8_t *result_type
+){
+ c = get_writable_copy_if_shared(c, &type);
+ container_t *result = NULL;
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ *result_type =
+ bitset_container_negation_range_inplace(
+ CAST_bitset(c), range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+ case ARRAY_CONTAINER_TYPE:
+ *result_type =
+ array_container_negation_range_inplace(
+ CAST_array(c), range_start, range_end, &result)
+ ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+ return result;
+ case RUN_CONTAINER_TYPE:
+ *result_type = run_container_negation_range_inplace(
+ CAST_run(c), range_start, range_end, &result);
+ return result;
+
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+}
+
+/**
+ * If the element of given rank is in this container, supposing that
+ * the first
+ * element has rank start_rank, then the function returns true and
+ * sets element
+ * accordingly.
+ * Otherwise, it returns false and update start_rank.
+ */
+static inline bool container_select(
+ const container_t *c, uint8_t type,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element
+){
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_select(const_CAST_bitset(c),
+ start_rank, rank, element);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_select(const_CAST_array(c),
+ start_rank, rank, element);
+ case RUN_CONTAINER_TYPE:
+ return run_container_select(const_CAST_run(c),
+ start_rank, rank, element);
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return false;
+}
+
+static inline uint16_t container_maximum(
+ const container_t *c, uint8_t type
+){
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_maximum(const_CAST_bitset(c));
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_maximum(const_CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_maximum(const_CAST_run(c));
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return false;
+}
+
+static inline uint16_t container_minimum(
+ const container_t *c, uint8_t type
+){
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_minimum(const_CAST_bitset(c));
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_minimum(const_CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_minimum(const_CAST_run(c));
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return false;
+}
+
+// number of values smaller or equal to x
+static inline int container_rank(
+ const container_t *c, uint8_t type,
+ uint16_t x
+){
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_rank(const_CAST_bitset(c), x);
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_rank(const_CAST_array(c), x);
+ case RUN_CONTAINER_TYPE:
+ return run_container_rank(const_CAST_run(c), x);
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ assert(false);
+ __builtin_unreachable();
+ return false;
+}
+
+/**
+ * Add all values in range [min, max] to a given container.
+ *
+ * If the returned pointer is different from $container, then a new container
+ * has been created and the caller is responsible for freeing it.
+ * The type of the first container may change. Returns the modified
+ * (and possibly new) container.
+ */
+static inline container_t *container_add_range(
+ container_t *c, uint8_t type,
+ uint32_t min, uint32_t max,
+ uint8_t *result_type
+){
+ // NB: when selecting new container type, we perform only inexpensive checks
+ switch (type) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_container_t *bitset = CAST_bitset(c);
+
+ int32_t union_cardinality = 0;
+ union_cardinality += bitset->cardinality;
+ union_cardinality += max - min + 1;
+ union_cardinality -= bitset_lenrange_cardinality(bitset->words,
+ min, max-min);
+
+ if (union_cardinality == INT32_C(0x10000)) {
+ *result_type = RUN_CONTAINER_TYPE;
+ return run_container_create_range(0, INT32_C(0x10000));
+ } else {
+ *result_type = BITSET_CONTAINER_TYPE;
+ bitset_set_lenrange(bitset->words, min, max - min);
+ bitset->cardinality = union_cardinality;
+ return bitset;
+ }
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ array_container_t *array = CAST_array(c);
+
+ int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
+ int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
+ int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
+
+ if (union_cardinality == INT32_C(0x10000)) {
+ *result_type = RUN_CONTAINER_TYPE;
+ return run_container_create_range(0, INT32_C(0x10000));
+ } else if (union_cardinality <= DEFAULT_MAX_SIZE) {
+ *result_type = ARRAY_CONTAINER_TYPE;
+ array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
+ return array;
+ } else {
+ *result_type = BITSET_CONTAINER_TYPE;
+ bitset_container_t *bitset = bitset_container_from_array(array);
+ bitset_set_lenrange(bitset->words, min, max - min);
+ bitset->cardinality = union_cardinality;
+ return bitset;
+ }
+ }
+ case RUN_CONTAINER_TYPE: {
+ run_container_t *run = CAST_run(c);
+
+ int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
+ int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
+
+ int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t);
+ int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+
+ if (run_size_bytes <= bitset_size_bytes) {
+ run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
+ *result_type = RUN_CONTAINER_TYPE;
+ return run;
+ } else {
+ return container_from_run_range(run, min, max, result_type);
+ }
+ }
+ default:
+ __builtin_unreachable();
+ }
+}
+
+/*
+ * Removes all elements in range [min, max].
+ * Returns one of:
+ * - NULL if no elements left
+ * - pointer to the original container
+ * - pointer to a newly-allocated container (if it is more efficient)
+ *
+ * If the returned pointer is different from $container, then a new container
+ * has been created and the caller is responsible for freeing the original container.
+ */
+static inline container_t *container_remove_range(
+ container_t *c, uint8_t type,
+ uint32_t min, uint32_t max,
+ uint8_t *result_type
+){
+ switch (type) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_container_t *bitset = CAST_bitset(c);
+
+ int32_t result_cardinality = bitset->cardinality -
+ bitset_lenrange_cardinality(bitset->words, min, max-min);
+
+ if (result_cardinality == 0) {
+ return NULL;
+ } else if (result_cardinality <= DEFAULT_MAX_SIZE) {
+ *result_type = ARRAY_CONTAINER_TYPE;
+ bitset_reset_range(bitset->words, min, max+1);
+ bitset->cardinality = result_cardinality;
+ return array_container_from_bitset(bitset);
+ } else {
+ *result_type = BITSET_CONTAINER_TYPE;
+ bitset_reset_range(bitset->words, min, max+1);
+ bitset->cardinality = result_cardinality;
+ return bitset;
+ }
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ array_container_t *array = CAST_array(c);
+
+ int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
+ int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
+ int32_t result_cardinality = nvals_less + nvals_greater;
+
+ if (result_cardinality == 0) {
+ return NULL;
+ } else {
+ *result_type = ARRAY_CONTAINER_TYPE;
+ array_container_remove_range(array, nvals_less,
+ array->cardinality - result_cardinality);
+ return array;
+ }
+ }
+ case RUN_CONTAINER_TYPE: {
+ run_container_t *run = CAST_run(c);
+
+ if (run->n_runs == 0) {
+ return NULL;
+ }
+ if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) {
+ return NULL;
+ }
+
+ run_container_remove_range(run, min, max);
+ return convert_run_to_efficient_container(run, result_type);
+ }
+ default:
+ __builtin_unreachable();
+ }
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+
+#endif
+/* end file include/roaring/containers/containers.h */
+/* begin file include/roaring/roaring_array.h */
+#ifndef INCLUDE_ROARING_ARRAY_H
+#define INCLUDE_ROARING_ARRAY_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring {
+
+// Note: in pure C++ code, you should avoid putting `using` in header files
+using api::roaring_array_t;
+
+namespace internal {
+#endif
+
+enum {
+ SERIAL_COOKIE_NO_RUNCONTAINER = 12346,
+ SERIAL_COOKIE = 12347,
+ FROZEN_COOKIE = 13766,
+ NO_OFFSET_THRESHOLD = 4
+};
+
+/**
+ * Create a new roaring array
+ */
+roaring_array_t *ra_create(void);
+
+/**
+ * Initialize an existing roaring array with the specified capacity (in number
+ * of containers)
+ */
+bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap);
+
+/**
+ * Initialize with zero capacity
+ */
+void ra_init(roaring_array_t *t);
+
+/**
+ * Copies this roaring array, we assume that dest is not initialized
+ */
+bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
+ bool copy_on_write);
+
+/*
+ * Shrinks the capacity, returns the number of bytes saved.
+ */
+int ra_shrink_to_fit(roaring_array_t *ra);
+
+/**
+ * Copies this roaring array, we assume that dest is initialized
+ */
+bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
+ bool copy_on_write);
+
+/**
+ * Frees the memory used by a roaring array
+ */
+void ra_clear(roaring_array_t *r);
+
+/**
+ * Frees the memory used by a roaring array, but does not free the containers
+ */
+void ra_clear_without_containers(roaring_array_t *r);
+
+/**
+ * Frees just the containers
+ */
+void ra_clear_containers(roaring_array_t *ra);
+
+/**
+ * Get the index corresponding to a 16-bit key
+ */
+inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
+ if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1;
+ return binarySearch(ra->keys, (int32_t)ra->size, x);
+}
+
+/**
+ * Retrieves the container at index i, filling in the typecode
+ */
+inline container_t *ra_get_container_at_index(
+ const roaring_array_t *ra, uint16_t i, uint8_t *typecode
+){
+ *typecode = ra->typecodes[i];
+ return ra->containers[i];
+}
+
+/**
+ * Retrieves the key at index i
+ */
+inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
+ return ra->keys[i];
+}
+
+/**
+ * Add a new key-value pair at index i
+ */
+void ra_insert_new_key_value_at(
+ roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode);
+
+/**
+ * Append a new key-value pair
+ */
+void ra_append(
+ roaring_array_t *ra, uint16_t key,
+ container_t *c, uint8_t typecode);
+
+/**
+ * Append a new key-value pair to ra, cloning (in COW sense) a value from sa
+ * at index index
+ */
+void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t index, bool copy_on_write);
+
+/**
+ * Append new key-value pairs to ra, cloning (in COW sense) values from sa
+ * at indexes
+ * [start_index, end_index)
+ */
+void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
+ int32_t start_index, int32_t end_index,
+ bool copy_on_write);
+
+/** appends from sa to ra, ending with the greatest key that is
+ * is less or equal stopping_key
+ */
+void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t stopping_key, bool copy_on_write);
+
+/** appends from sa to ra, starting with the smallest key that is
+ * is strictly greater than before_start
+ */
+
+void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t before_start, bool copy_on_write);
+
+/**
+ * Move the key-value pairs to ra from sa at indexes
+ * [start_index, end_index), old array should not be freed
+ * (use ra_clear_without_containers)
+ **/
+void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
+ int32_t start_index, int32_t end_index);
+/**
+ * Append new key-value pairs to ra, from sa at indexes
+ * [start_index, end_index)
+ */
+void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
+ int32_t start_index, int32_t end_index,
+ bool copy_on_write);
+
+/**
+ * Set the container at the corresponding index using the specified
+ * typecode.
+ */
+inline void ra_set_container_at_index(
+ const roaring_array_t *ra, int32_t i,
+ container_t *c, uint8_t typecode
+){
+ assert(i < ra->size);
+ ra->containers[i] = c;
+ ra->typecodes[i] = typecode;
+}
+
+/**
+ * If needed, increase the capacity of the array so that it can fit k values
+ * (at
+ * least);
+ */
+bool extend_array(roaring_array_t *ra, int32_t k);
+
+inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; }
+
+static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
+ int32_t pos) {
+ return advanceUntil(ra->keys, pos, ra->size, x);
+}
+
+int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos);
+
+void ra_downsize(roaring_array_t *ra, int32_t new_length);
+
+inline void ra_replace_key_and_container_at_index(
+ roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode
+){
+ assert(i < ra->size);
+
+ ra->keys[i] = key;
+ ra->containers[i] = c;
+ ra->typecodes[i] = typecode;
+}
+
+// write set bits to an array
+void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);
+
+bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans);
+
+/**
+ * write a bitmap to a buffer. This is meant to be compatible with
+ * the
+ * Java and Go versions. Return the size in bytes of the serialized
+ * output (which should be ra_portable_size_in_bytes(ra)).
+ */
+size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);
+
+/**
+ * read a bitmap from a serialized version. This is meant to be compatible
+ * with the Java and Go versions.
+ * maxbytes indicates how many bytes available from buf.
+ * When the function returns true, roaring_array_t is populated with the data
+ * and *readbytes indicates how many bytes were read. In all cases, if the function
+ * returns true, then maxbytes >= *readbytes.
+ */
+bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes);
+
+/**
+ * Quickly checks whether there is a serialized bitmap at the pointer,
+ * not exceeding size "maxbytes" in bytes. This function does not allocate
+ * memory dynamically.
+ *
+ * This function returns 0 if and only if no valid bitmap is found.
+ * Otherwise, it returns how many bytes are occupied by the bitmap data.
+ */
+size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes);
+
+/**
+ * How many bytes are required to serialize this bitmap (meant to be
+ * compatible
+ * with Java and Go versions)
+ */
+size_t ra_portable_size_in_bytes(const roaring_array_t *ra);
+
+/**
+ * return true if it contains at least one run container.
+ */
+bool ra_has_run_container(const roaring_array_t *ra);
+
+/**
+ * Size of the header when serializing (meant to be compatible
+ * with Java and Go versions)
+ */
+uint32_t ra_portable_header_size(const roaring_array_t *ra);
+
+/**
+ * If the container at the index i is share, unshare it (creating a local
+ * copy if needed).
+ */
+static inline void ra_unshare_container_at_index(roaring_array_t *ra,
+ uint16_t i) {
+ assert(i < ra->size);
+ ra->containers[i] = get_writable_copy_if_shared(ra->containers[i],
+ &ra->typecodes[i]);
+}
+
+/**
+ * remove at index i, sliding over all entries after i
+ */
+void ra_remove_at_index(roaring_array_t *ra, int32_t i);
+
+
+/**
+* clears all containers, sets the size at 0 and shrinks the memory usage.
+*/
+void ra_reset(roaring_array_t *ra);
+
+/**
+ * remove at index i, sliding over all entries after i. Free removed container.
+ */
+void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i);
+
+/**
+ * remove a chunk of indices, sliding over entries after it
+ */
+// void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end);
+
+// used in inplace andNot only, to slide left the containers from
+// the mutated RoaringBitmap that are after the largest container of
+// the argument RoaringBitmap. It is followed by a call to resize.
+//
+void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
+ uint32_t new_begin);
+
+/**
+ * Shifts rightmost $count containers to the left (distance < 0) or
+ * to the right (distance > 0).
+ * Allocates memory if necessary.
+ * This function doesn't free or create new containers.
+ * Caller is responsible for that.
+ */
+void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
+
+#ifdef __cplusplus
+} // namespace internal
+} } // extern "C" { namespace roaring {
+#endif
+
+#endif
+/* end file include/roaring/roaring_array.h */
+/* begin file src/array_util.c */
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
+ uint16_t ikey);
+
+#ifdef CROARING_IS_X64
+// used by intersect_vector16
+ALIGNED(0x1000)
+static const uint8_t shuffle_mask16[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
+ 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
+ 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
+ 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
+ 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
+ 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
+ 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
+ 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
+ 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
+ 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
+ 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
+ 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
+ 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
+ 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
+ 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13,
+ 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
+ 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
+ 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
+ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15};
+
+/**
+ * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
+ * Optimized by D. Lemire on May 3rd 2013
+ */
+CROARING_TARGET_AVX2
+int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
+ const uint16_t *__restrict__ B, size_t s_b,
+ uint16_t *C) {
+ size_t count = 0;
+ size_t i_a = 0, i_b = 0;
+ const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
+ const size_t st_a = (s_a / vectorlength) * vectorlength;
+ const size_t st_b = (s_b / vectorlength) * vectorlength;
+ __m128i v_a, v_b;
+ if ((i_a < st_a) && (i_b < st_b)) {
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ while ((A[i_a] == 0) || (B[i_b] == 0)) {
+ const __m128i res_v = _mm_cmpestrm(
+ v_b, vectorlength, v_a, vectorlength,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ const int r = _mm_extract_epi32(res_v, 0);
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
+ __m128i p = _mm_shuffle_epi8(v_a, sm16);
+ _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
+ count += _mm_popcnt_u32(r);
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ i_a += vectorlength;
+ if (i_a == st_a) break;
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ if ((i_a < st_a) && (i_b < st_b))
+ while (true) {
+ const __m128i res_v = _mm_cmpistrm(
+ v_b, v_a,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ const int r = _mm_extract_epi32(res_v, 0);
+ __m128i sm16 =
+ _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
+ __m128i p = _mm_shuffle_epi8(v_a, sm16);
+ _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
+ count += _mm_popcnt_u32(r);
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ i_a += vectorlength;
+ if (i_a == st_a) break;
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ }
+ // intersect the tail using scalar intersection
+ while (i_a < s_a && i_b < s_b) {
+ uint16_t a = A[i_a];
+ uint16_t b = B[i_b];
+ if (a < b) {
+ i_a++;
+ } else if (b < a) {
+ i_b++;
+ } else {
+ C[count] = a; //==b;
+ count++;
+ i_a++;
+ i_b++;
+ }
+ }
+ return (int32_t)count;
+}
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
+ size_t s_a,
+ const uint16_t *__restrict__ B,
+ size_t s_b) {
+ size_t count = 0;
+ size_t i_a = 0, i_b = 0;
+ const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
+ const size_t st_a = (s_a / vectorlength) * vectorlength;
+ const size_t st_b = (s_b / vectorlength) * vectorlength;
+ __m128i v_a, v_b;
+ if ((i_a < st_a) && (i_b < st_b)) {
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ while ((A[i_a] == 0) || (B[i_b] == 0)) {
+ const __m128i res_v = _mm_cmpestrm(
+ v_b, vectorlength, v_a, vectorlength,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ const int r = _mm_extract_epi32(res_v, 0);
+ count += _mm_popcnt_u32(r);
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ i_a += vectorlength;
+ if (i_a == st_a) break;
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ if ((i_a < st_a) && (i_b < st_b))
+ while (true) {
+ const __m128i res_v = _mm_cmpistrm(
+ v_b, v_a,
+ _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ const int r = _mm_extract_epi32(res_v, 0);
+ count += _mm_popcnt_u32(r);
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ i_a += vectorlength;
+ if (i_a == st_a) break;
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ }
+ // intersect the tail using scalar intersection
+ while (i_a < s_a && i_b < s_b) {
+ uint16_t a = A[i_a];
+ uint16_t b = B[i_b];
+ if (a < b) {
+ i_a++;
+ } else if (b < a) {
+ i_b++;
+ } else {
+ count++;
+ i_a++;
+ i_b++;
+ }
+ }
+ return (int32_t)count;
+}
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+/////////
+// Warning:
+// This function may not be safe if A == C or B == C.
+/////////
+int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
+ const uint16_t *__restrict__ B, size_t s_b,
+ uint16_t *C) {
+ // we handle the degenerate case
+ if (s_a == 0) return 0;
+ if (s_b == 0) {
+ if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a);
+ return (int32_t)s_a;
+ }
+ // handle the leading zeroes, it is messy but it allows us to use the fast
+ // _mm_cmpistrm instrinsic safely
+ int32_t count = 0;
+ if ((A[0] == 0) || (B[0] == 0)) {
+ if ((A[0] == 0) && (B[0] == 0)) {
+ A++;
+ s_a--;
+ B++;
+ s_b--;
+ } else if (A[0] == 0) {
+ C[count++] = 0;
+ A++;
+ s_a--;
+ } else {
+ B++;
+ s_b--;
+ }
+ }
+ // at this point, we have two non-empty arrays, made of non-zero
+ // increasing values.
+ size_t i_a = 0, i_b = 0;
+ const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t);
+ const size_t st_a = (s_a / vectorlength) * vectorlength;
+ const size_t st_b = (s_b / vectorlength) * vectorlength;
+ if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path
+ __m128i v_a, v_b; //, v_bmax;
+ // we load a vector from A and a vector from B
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ // we have a runningmask which indicates which values from A have been
+ // spotted in B, these don't get written out.
+ __m128i runningmask_a_found_in_b = _mm_setzero_si128();
+ /****
+ * start of the main vectorized loop
+ *****/
+ while (true) {
+ // afoundinb will contain a mask indicate for each entry in A
+ // whether it is seen
+ // in B
+ const __m128i a_found_in_b =
+ _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
+ _SIDD_BIT_MASK);
+ runningmask_a_found_in_b =
+ _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
+ // we always compare the last values of A and B
+ const uint16_t a_max = A[i_a + vectorlength - 1];
+ const uint16_t b_max = B[i_b + vectorlength - 1];
+ if (a_max <= b_max) {
+ // Ok. In this code path, we are ready to write our v_a
+ // because there is no need to read more from B, they will
+ // all be large values.
+ const int bitmask_belongs_to_difference =
+ _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
+ /*** next few lines are probably expensive *****/
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
+ bitmask_belongs_to_difference);
+ __m128i p = _mm_shuffle_epi8(v_a, sm16);
+ _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
+ count += _mm_popcnt_u32(bitmask_belongs_to_difference);
+ // we advance a
+ i_a += vectorlength;
+ if (i_a == st_a) // no more
+ break;
+ runningmask_a_found_in_b = _mm_setzero_si128();
+ v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+ }
+ if (b_max <= a_max) {
+ // in this code path, the current v_b has become useless
+ i_b += vectorlength;
+ if (i_b == st_b) break;
+ v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+ }
+ }
+ // at this point, either we have i_a == st_a, which is the end of the
+ // vectorized processing,
+ // or we have i_b == st_b, and we are not done processing the vector...
+ // so we need to finish it off.
+ if (i_a < st_a) { // we have unfinished business...
+ uint16_t buffer[8]; // buffer to do a masked load
+ memset(buffer, 0, 8 * sizeof(uint16_t));
+ memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t));
+ v_b = _mm_lddqu_si128((__m128i *)buffer);
+ const __m128i a_found_in_b =
+ _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
+ _SIDD_BIT_MASK);
+ runningmask_a_found_in_b =
+ _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
+ const int bitmask_belongs_to_difference =
+ _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
+ __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
+ bitmask_belongs_to_difference);
+ __m128i p = _mm_shuffle_epi8(v_a, sm16);
+ _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
+ count += _mm_popcnt_u32(bitmask_belongs_to_difference);
+ i_a += vectorlength;
+ }
+ // at this point we should have i_a == st_a and i_b == st_b
+ }
+ // do the tail using scalar code
+ while (i_a < s_a && i_b < s_b) {
+ uint16_t a = A[i_a];
+ uint16_t b = B[i_b];
+ if (b < a) {
+ i_b++;
+ } else if (a < b) {
+ C[count] = a;
+ count++;
+ i_a++;
+ } else { //==
+ i_a++;
+ i_b++;
+ }
+ }
+ if (i_a < s_a) {
+ if(C == A) {
+ assert((size_t)count <= i_a);
+ if((size_t)count < i_a) {
+ memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
+ }
+ } else {
+ for(size_t i = 0; i < (s_a - i_a); i++) {
+ C[count + i] = A[i + i_a];
+ }
+ }
+ count += (int32_t)(s_a - i_a);
+ }
+ return count;
+}
+CROARING_UNTARGET_REGION
+#endif // CROARING_IS_X64
+
+
+
+/**
+* Branchless binary search going after 4 values at once.
+* Assumes that array is sorted.
+* You have that array[*index1] >= target1, array[*index12] >= target2, ...
+* except when *index1 = n, in which case you know that all values in array are
+* smaller than target1, and so forth.
+* It has logarithmic complexity.
+*/
+static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1,
+ uint16_t target2, uint16_t target3, uint16_t target4,
+ int32_t *index1, int32_t *index2, int32_t *index3,
+ int32_t *index4) {
+ const uint16_t *base1 = array;
+ const uint16_t *base2 = array;
+ const uint16_t *base3 = array;
+ const uint16_t *base4 = array;
+ if (n == 0)
+ return;
+ while (n > 1) {
+ int32_t half = n >> 1;
+ base1 = (base1[half] < target1) ? &base1[half] : base1;
+ base2 = (base2[half] < target2) ? &base2[half] : base2;
+ base3 = (base3[half] < target3) ? &base3[half] : base3;
+ base4 = (base4[half] < target4) ? &base4[half] : base4;
+ n -= half;
+ }
+ *index1 = (int32_t)((*base1 < target1) + base1 - array);
+ *index2 = (int32_t)((*base2 < target2) + base2 - array);
+ *index3 = (int32_t)((*base3 < target3) + base3 - array);
+ *index4 = (int32_t)((*base4 < target4) + base4 - array);
+}
+
+/**
+* Branchless binary search going after 2 values at once.
+* Assumes that array is sorted.
+* You have that array[*index1] >= target1, array[*index12] >= target2.
+* except when *index1 = n, in which case you know that all values in array are
+* smaller than target1, and so forth.
+* It has logarithmic complexity.
+*/
+static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
+ uint16_t target2, int32_t *index1, int32_t *index2) {
+ const uint16_t *base1 = array;
+ const uint16_t *base2 = array;
+ if (n == 0)
+ return;
+ while (n > 1) {
+ int32_t half = n >> 1;
+ base1 = (base1[half] < target1) ? &base1[half] : base1;
+ base2 = (base2[half] < target2) ? &base2[half] : base2;
+ n -= half;
+ }
+ *index1 = (int32_t)((*base1 < target1) + base1 - array);
+ *index2 = (int32_t)((*base2 < target2) + base2 - array);
+}
+
+/* Computes the intersection between one small and one large set of uint16_t.
+ * Stores the result into buffer and return the number of elements.
+ * Processes the small set in blocks of 4 values calling binarySearch4
+ * and binarySearch2. This approach can be slightly superior to a conventional
+ * galloping search in some instances.
+ */
+int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
+ const uint16_t *large, size_t size_l,
+ uint16_t *buffer) {
+ size_t pos = 0, idx_l = 0, idx_s = 0;
+
+ if (0 == size_s) {
+ return 0;
+ }
+ int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
+ while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
+ uint16_t target1 = small[idx_s];
+ uint16_t target2 = small[idx_s + 1];
+ uint16_t target3 = small[idx_s + 2];
+ uint16_t target4 = small[idx_s + 3];
+ binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3,
+ target4, &index1, &index2, &index3, &index4);
+ if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
+ buffer[pos++] = target1;
+ }
+ if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
+ buffer[pos++] = target2;
+ }
+ if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
+ buffer[pos++] = target3;
+ }
+ if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
+ buffer[pos++] = target4;
+ }
+ idx_s += 4;
+ idx_l += index4;
+ }
+ if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
+ uint16_t target1 = small[idx_s];
+ uint16_t target2 = small[idx_s + 1];
+ binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1,
+ &index2);
+ if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
+ buffer[pos++] = target1;
+ }
+ if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
+ buffer[pos++] = target2;
+ }
+ idx_s += 2;
+ idx_l += index2;
+ }
+ if ((idx_s < size_s) && (idx_l < size_l)) {
+ uint16_t val_s = small[idx_s];
+ int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
+ if (index >= 0)
+ buffer[pos++] = val_s;
+ }
+ return (int32_t)pos;
+}
+
+
+
+// TODO: this could be accelerated, possibly, by using binarySearch4 as above.
+int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
+ size_t size_s,
+ const uint16_t *large,
+ size_t size_l) {
+ size_t pos = 0, idx_l = 0, idx_s = 0;
+
+ if (0 == size_s) {
+ return 0;
+ }
+
+ uint16_t val_l = large[idx_l], val_s = small[idx_s];
+
+ while (true) {
+ if (val_l < val_s) {
+ idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
+ if (idx_l == size_l) break;
+ val_l = large[idx_l];
+ } else if (val_s < val_l) {
+ idx_s++;
+ if (idx_s == size_s) break;
+ val_s = small[idx_s];
+ } else {
+ pos++;
+ idx_s++;
+ if (idx_s == size_s) break;
+ val_s = small[idx_s];
+ idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
+ if (idx_l == size_l) break;
+ val_l = large[idx_l];
+ }
+ }
+
+ return (int32_t)pos;
+}
+
+bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s,
+ const uint16_t *large, size_t size_l) {
+ size_t idx_l = 0, idx_s = 0;
+
+ if (0 == size_s) {
+ return false;
+ }
+
+ uint16_t val_l = large[idx_l], val_s = small[idx_s];
+
+ while (true) {
+ if (val_l < val_s) {
+ idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
+ if (idx_l == size_l) break;
+ val_l = large[idx_l];
+ } else if (val_s < val_l) {
+ idx_s++;
+ if (idx_s == size_s) break;
+ val_s = small[idx_s];
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Generic intersection function.
+ */
+int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
+ const uint16_t *B, const size_t lenB, uint16_t *out) {
+ const uint16_t *initout = out;
+ if (lenA == 0 || lenB == 0) return 0;
+ const uint16_t *endA = A + lenA;
+ const uint16_t *endB = B + lenB;
+
+ while (1) {
+ while (*A < *B) {
+ SKIP_FIRST_COMPARE:
+ if (++A == endA) return (int32_t)(out - initout);
+ }
+ while (*A > *B) {
+ if (++B == endB) return (int32_t)(out - initout);
+ }
+ if (*A == *B) {
+ *out++ = *A;
+ if (++A == endA || ++B == endB) return (int32_t)(out - initout);
+ } else {
+ goto SKIP_FIRST_COMPARE;
+ }
+ }
+ return (int32_t)(out - initout); // NOTREACHED
+}
+
+int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
+ const uint16_t *B, const size_t lenB) {
+ int32_t answer = 0;
+ if (lenA == 0 || lenB == 0) return 0;
+ const uint16_t *endA = A + lenA;
+ const uint16_t *endB = B + lenB;
+
+ while (1) {
+ while (*A < *B) {
+ SKIP_FIRST_COMPARE:
+ if (++A == endA) return answer;
+ }
+ while (*A > *B) {
+ if (++B == endB) return answer;
+ }
+ if (*A == *B) {
+ ++answer;
+ if (++A == endA || ++B == endB) return answer;
+ } else {
+ goto SKIP_FIRST_COMPARE;
+ }
+ }
+ return answer; // NOTREACHED
+}
+
+
+bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
+ const uint16_t *B, const size_t lenB) {
+ if (lenA == 0 || lenB == 0) return 0;
+ const uint16_t *endA = A + lenA;
+ const uint16_t *endB = B + lenB;
+
+ while (1) {
+ while (*A < *B) {
+ SKIP_FIRST_COMPARE:
+ if (++A == endA) return false;
+ }
+ while (*A > *B) {
+ if (++B == endB) return false;
+ }
+ if (*A == *B) {
+ return true;
+ } else {
+ goto SKIP_FIRST_COMPARE;
+ }
+ }
+ return false; // NOTREACHED
+}
+
+
+
+/**
+ * Generic intersection function.
+ */
+size_t intersection_uint32(const uint32_t *A, const size_t lenA,
+ const uint32_t *B, const size_t lenB,
+ uint32_t *out) {
+ const uint32_t *initout = out;
+ if (lenA == 0 || lenB == 0) return 0;
+ const uint32_t *endA = A + lenA;
+ const uint32_t *endB = B + lenB;
+
+ while (1) {
+ while (*A < *B) {
+ SKIP_FIRST_COMPARE:
+ if (++A == endA) return (out - initout);
+ }
+ while (*A > *B) {
+ if (++B == endB) return (out - initout);
+ }
+ if (*A == *B) {
+ *out++ = *A;
+ if (++A == endA || ++B == endB) return (out - initout);
+ } else {
+ goto SKIP_FIRST_COMPARE;
+ }
+ }
+ return (out - initout); // NOTREACHED
+}
+
+size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
+ const uint32_t *B, const size_t lenB) {
+ if (lenA == 0 || lenB == 0) return 0;
+ size_t card = 0;
+ const uint32_t *endA = A + lenA;
+ const uint32_t *endB = B + lenB;
+
+ while (1) {
+ while (*A < *B) {
+ SKIP_FIRST_COMPARE:
+ if (++A == endA) return card;
+ }
+ while (*A > *B) {
+ if (++B == endB) return card;
+ }
+ if (*A == *B) {
+ card++;
+ if (++A == endA || ++B == endB) return card;
+ } else {
+ goto SKIP_FIRST_COMPARE;
+ }
+ }
+ return card; // NOTREACHED
+}
+
+// can one vectorize the computation of the union? (Update: Yes! See
+// union_vector16).
+
+size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+ size_t size_2, uint16_t *buffer) {
+ size_t pos = 0, idx_1 = 0, idx_2 = 0;
+
+ if (0 == size_2) {
+ memmove(buffer, set_1, size_1 * sizeof(uint16_t));
+ return size_1;
+ }
+ if (0 == size_1) {
+ memmove(buffer, set_2, size_2 * sizeof(uint16_t));
+ return size_2;
+ }
+
+ uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
+
+ while (true) {
+ if (val_1 < val_2) {
+ buffer[pos++] = val_1;
+ ++idx_1;
+ if (idx_1 >= size_1) break;
+ val_1 = set_1[idx_1];
+ } else if (val_2 < val_1) {
+ buffer[pos++] = val_2;
+ ++idx_2;
+ if (idx_2 >= size_2) break;
+ val_2 = set_2[idx_2];
+ } else {
+ buffer[pos++] = val_1;
+ ++idx_1;
+ ++idx_2;
+ if (idx_1 >= size_1 || idx_2 >= size_2) break;
+ val_1 = set_1[idx_1];
+ val_2 = set_2[idx_2];
+ }
+ }
+
+ if (idx_1 < size_1) {
+ const size_t n_elems = size_1 - idx_1;
+ memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t));
+ pos += n_elems;
+ } else if (idx_2 < size_2) {
+ const size_t n_elems = size_2 - idx_2;
+ memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t));
+ pos += n_elems;
+ }
+
+ return pos;
+}
+
+int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
+ int length2, uint16_t *a_out) {
+ int out_card = 0;
+ int k1 = 0, k2 = 0;
+ if (length1 == 0) return 0;
+ if (length2 == 0) {
+ if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1);
+ return length1;
+ }
+ uint16_t s1 = a1[k1];
+ uint16_t s2 = a2[k2];
+ while (true) {
+ if (s1 < s2) {
+ a_out[out_card++] = s1;
+ ++k1;
+ if (k1 >= length1) {
+ break;
+ }
+ s1 = a1[k1];
+ } else if (s1 == s2) {
+ ++k1;
+ ++k2;
+ if (k1 >= length1) {
+ break;
+ }
+ if (k2 >= length2) {
+ memmove(a_out + out_card, a1 + k1,
+ sizeof(uint16_t) * (length1 - k1));
+ return out_card + length1 - k1;
+ }
+ s1 = a1[k1];
+ s2 = a2[k2];
+ } else { // if (val1>val2)
+ ++k2;
+ if (k2 >= length2) {
+ memmove(a_out + out_card, a1 + k1,
+ sizeof(uint16_t) * (length1 - k1));
+ return out_card + length1 - k1;
+ }
+ s2 = a2[k2];
+ }
+ }
+ return out_card;
+}
+
+int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
+ const uint16_t *array_2, int32_t card_2, uint16_t *out) {
+ int32_t pos1 = 0, pos2 = 0, pos_out = 0;
+ while (pos1 < card_1 && pos2 < card_2) {
+ const uint16_t v1 = array_1[pos1];
+ const uint16_t v2 = array_2[pos2];
+ if (v1 == v2) {
+ ++pos1;
+ ++pos2;
+ continue;
+ }
+ if (v1 < v2) {
+ out[pos_out++] = v1;
+ ++pos1;
+ } else {
+ out[pos_out++] = v2;
+ ++pos2;
+ }
+ }
+ if (pos1 < card_1) {
+ const size_t n_elems = card_1 - pos1;
+ memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t));
+ pos_out += (int32_t)n_elems;
+ } else if (pos2 < card_2) {
+ const size_t n_elems = card_2 - pos2;
+ memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t));
+ pos_out += (int32_t)n_elems;
+ }
+ return pos_out;
+}
+
+#ifdef CROARING_IS_X64
+
+/***
+ * start of the SIMD 16-bit union code
+ *
+ */
+CROARING_TARGET_AVX2
+
+// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going
+// from vecMin all the way to vecMax
+// developed originally for merge sort using SIMD instructions.
+// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly
+// Algorithm for Sorting an Array of Structures
+static inline void sse_merge(const __m128i *vInput1,
+ const __m128i *vInput2, // input 1 & 2
+ __m128i *vecMin, __m128i *vecMax) { // output
+ __m128i vecTmp;
+ vecTmp = _mm_min_epu16(*vInput1, *vInput2);
+ *vecMax = _mm_max_epu16(*vInput1, *vInput2);
+ vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+ *vecMin = _mm_min_epu16(vecTmp, *vecMax);
+ *vecMax = _mm_max_epu16(vecTmp, *vecMax);
+ *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
+}
+CROARING_UNTARGET_REGION
+// used by store_unique, generated by simdunion.py
+static uint8_t uniqshuf[] = {
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
+ 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
+ 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
+ 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
+ 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
+ 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
+ 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
+ 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
+ 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
+ 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
+ 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
+ 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
+ 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
+ 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
+ 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
+ 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
+ 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
+ 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
+ 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
+ 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
+ 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
+ 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
+ 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF};
+CROARING_TARGET_AVX2
+// write vector new, while omitting repeated values assuming that previously
+// written vector was "old"
+static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
+ __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2);
+ // lots of high latency instructions follow (optimize?)
+ int M = _mm_movemask_epi8(
+ _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128()));
+ int numberofnewvalues = 8 - _mm_popcnt_u32(M);
+ __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
+ __m128i val = _mm_shuffle_epi8(newval, key);
+ _mm_storeu_si128((__m128i *)output, val);
+ return numberofnewvalues;
+}
+CROARING_UNTARGET_REGION
+
+// working in-place, this function overwrites the repeated values
+// could be avoided?
+static inline uint32_t unique(uint16_t *out, uint32_t len) {
+ uint32_t pos = 1;
+ for (uint32_t i = 1; i < len; ++i) {
+ if (out[i] != out[i - 1]) {
+ out[pos++] = out[i];
+ }
+ }
+ return pos;
+}
+
+// use with qsort, could be avoided
+static int uint16_compare(const void *a, const void *b) {
+ return (*(uint16_t *)a - *(uint16_t *)b);
+}
+
+CROARING_TARGET_AVX2
+// a one-pass SSE union algorithm
+// This function may not be safe if array1 == output or array2 == output.
+uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
+ const uint16_t *__restrict__ array2, uint32_t length2,
+ uint16_t *__restrict__ output) {
+ if ((length1 < 8) || (length2 < 8)) {
+ return (uint32_t)union_uint16(array1, length1, array2, length2, output);
+ }
+ __m128i vA, vB, V, vecMin, vecMax;
+ __m128i laststore;
+ uint16_t *initoutput = output;
+ uint32_t len1 = length1 / 8;
+ uint32_t len2 = length2 / 8;
+ uint32_t pos1 = 0;
+ uint32_t pos2 = 0;
+ // we start the machine
+ vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
+ pos1++;
+ vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
+ pos2++;
+ sse_merge(&vA, &vB, &vecMin, &vecMax);
+ laststore = _mm_set1_epi16(-1);
+ output += store_unique(laststore, vecMin, output);
+ laststore = vecMin;
+ if ((pos1 < len1) && (pos2 < len2)) {
+ uint16_t curA, curB;
+ curA = array1[8 * pos1];
+ curB = array2[8 * pos2];
+ while (true) {
+ if (curA <= curB) {
+ V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
+ pos1++;
+ if (pos1 < len1) {
+ curA = array1[8 * pos1];
+ } else {
+ break;
+ }
+ } else {
+ V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
+ pos2++;
+ if (pos2 < len2) {
+ curB = array2[8 * pos2];
+ } else {
+ break;
+ }
+ }
+ sse_merge(&V, &vecMax, &vecMin, &vecMax);
+ output += store_unique(laststore, vecMin, output);
+ laststore = vecMin;
+ }
+ sse_merge(&V, &vecMax, &vecMin, &vecMax);
+ output += store_unique(laststore, vecMin, output);
+ laststore = vecMin;
+ }
+ // we finish the rest off using a scalar algorithm
+ // could be improved?
+ //
+ // copy the small end on a tmp buffer
+ uint32_t len = (uint32_t)(output - initoutput);
+ uint16_t buffer[16];
+ uint32_t leftoversize = store_unique(laststore, vecMax, buffer);
+ if (pos1 == len1) {
+ memcpy(buffer + leftoversize, array1 + 8 * pos1,
+ (length1 - 8 * len1) * sizeof(uint16_t));
+ leftoversize += length1 - 8 * len1;
+ qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
+
+ leftoversize = unique(buffer, leftoversize);
+ len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2,
+ length2 - 8 * pos2, output);
+ } else {
+ memcpy(buffer + leftoversize, array2 + 8 * pos2,
+ (length2 - 8 * len2) * sizeof(uint16_t));
+ leftoversize += length2 - 8 * len2;
+ qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
+ leftoversize = unique(buffer, leftoversize);
+ len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1,
+ length1 - 8 * pos1, output);
+ }
+ return len;
+}
+CROARING_UNTARGET_REGION
+
+/**
+ * End of the SIMD 16-bit union code
+ *
+ */
+
+/**
+ * Start of SIMD 16-bit XOR code
+ */
+
+CROARING_TARGET_AVX2
+// write vector new, while omitting repeated values assuming that previously
+// written vector was "old"
+static inline int store_unique_xor(__m128i old, __m128i newval,
+ uint16_t *output) {
+ __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4);
+ __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2);
+ __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1);
+ __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval);
+ __m128i equalleftoright = _mm_or_si128(equalleft, equalright);
+ int M = _mm_movemask_epi8(
+ _mm_packs_epi16(equalleftoright, _mm_setzero_si128()));
+ int numberofnewvalues = 8 - _mm_popcnt_u32(M);
+ __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
+ __m128i val = _mm_shuffle_epi8(vecTmp2, key);
+ _mm_storeu_si128((__m128i *)output, val);
+ return numberofnewvalues;
+}
+CROARING_UNTARGET_REGION
+
+// working in-place, this function overwrites the repeated values
+// could be avoided? Warning: assumes len > 0
+static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
+ uint32_t pos = 1;
+ for (uint32_t i = 1; i < len; ++i) {
+ if (out[i] != out[i - 1]) {
+ out[pos++] = out[i];
+ } else
+ pos--; // if it is identical to previous, delete it
+ }
+ return pos;
+}
+CROARING_TARGET_AVX2
+// a one-pass SSE xor algorithm
+uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
+ const uint16_t *__restrict__ array2, uint32_t length2,
+ uint16_t *__restrict__ output) {
+ if ((length1 < 8) || (length2 < 8)) {
+ return xor_uint16(array1, length1, array2, length2, output);
+ }
+ __m128i vA, vB, V, vecMin, vecMax;
+ __m128i laststore;
+ uint16_t *initoutput = output;
+ uint32_t len1 = length1 / 8;
+ uint32_t len2 = length2 / 8;
+ uint32_t pos1 = 0;
+ uint32_t pos2 = 0;
+ // we start the machine
+ vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
+ pos1++;
+ vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
+ pos2++;
+ sse_merge(&vA, &vB, &vecMin, &vecMax);
+ laststore = _mm_set1_epi16(-1);
+ uint16_t buffer[17];
+ output += store_unique_xor(laststore, vecMin, output);
+
+ laststore = vecMin;
+ if ((pos1 < len1) && (pos2 < len2)) {
+ uint16_t curA, curB;
+ curA = array1[8 * pos1];
+ curB = array2[8 * pos2];
+ while (true) {
+ if (curA <= curB) {
+ V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
+ pos1++;
+ if (pos1 < len1) {
+ curA = array1[8 * pos1];
+ } else {
+ break;
+ }
+ } else {
+ V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
+ pos2++;
+ if (pos2 < len2) {
+ curB = array2[8 * pos2];
+ } else {
+ break;
+ }
+ }
+ sse_merge(&V, &vecMax, &vecMin, &vecMax);
+ // conditionally stores the last value of laststore as well as all
+ // but the
+ // last value of vecMin
+ output += store_unique_xor(laststore, vecMin, output);
+ laststore = vecMin;
+ }
+ sse_merge(&V, &vecMax, &vecMin, &vecMax);
+ // conditionally stores the last value of laststore as well as all but
+ // the
+ // last value of vecMin
+ output += store_unique_xor(laststore, vecMin, output);
+ laststore = vecMin;
+ }
+ uint32_t len = (uint32_t)(output - initoutput);
+
+ // we finish the rest off using a scalar algorithm
+ // could be improved?
+ // conditionally stores the last value of laststore as well as all but the
+ // last value of vecMax,
+ // we store to "buffer"
+ int leftoversize = store_unique_xor(laststore, vecMax, buffer);
+ uint16_t vec7 = _mm_extract_epi16(vecMax, 7);
+ uint16_t vec6 = _mm_extract_epi16(vecMax, 6);
+ if (vec7 != vec6) buffer[leftoversize++] = vec7;
+ if (pos1 == len1) {
+ memcpy(buffer + leftoversize, array1 + 8 * pos1,
+ (length1 - 8 * len1) * sizeof(uint16_t));
+ leftoversize += length1 - 8 * len1;
+ if (leftoversize == 0) { // trivial case
+ memcpy(output, array2 + 8 * pos2,
+ (length2 - 8 * pos2) * sizeof(uint16_t));
+ len += (length2 - 8 * pos2);
+ } else {
+ qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
+ leftoversize = unique_xor(buffer, leftoversize);
+ len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2,
+ length2 - 8 * pos2, output);
+ }
+ } else {
+ memcpy(buffer + leftoversize, array2 + 8 * pos2,
+ (length2 - 8 * len2) * sizeof(uint16_t));
+ leftoversize += length2 - 8 * len2;
+ if (leftoversize == 0) { // trivial case
+ memcpy(output, array1 + 8 * pos1,
+ (length1 - 8 * pos1) * sizeof(uint16_t));
+ len += (length1 - 8 * pos1);
+ } else {
+ qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
+ leftoversize = unique_xor(buffer, leftoversize);
+ len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1,
+ length1 - 8 * pos1, output);
+ }
+ }
+ return len;
+}
+CROARING_UNTARGET_REGION
+/**
+ * End of SIMD 16-bit XOR code
+ */
+
+#endif // CROARING_IS_X64
+
+size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
+ size_t size_2, uint32_t *buffer) {
+ size_t pos = 0, idx_1 = 0, idx_2 = 0;
+
+ if (0 == size_2) {
+ memmove(buffer, set_1, size_1 * sizeof(uint32_t));
+ return size_1;
+ }
+ if (0 == size_1) {
+ memmove(buffer, set_2, size_2 * sizeof(uint32_t));
+ return size_2;
+ }
+
+ uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
+
+ while (true) {
+ if (val_1 < val_2) {
+ buffer[pos++] = val_1;
+ ++idx_1;
+ if (idx_1 >= size_1) break;
+ val_1 = set_1[idx_1];
+ } else if (val_2 < val_1) {
+ buffer[pos++] = val_2;
+ ++idx_2;
+ if (idx_2 >= size_2) break;
+ val_2 = set_2[idx_2];
+ } else {
+ buffer[pos++] = val_1;
+ ++idx_1;
+ ++idx_2;
+ if (idx_1 >= size_1 || idx_2 >= size_2) break;
+ val_1 = set_1[idx_1];
+ val_2 = set_2[idx_2];
+ }
+ }
+
+ if (idx_1 < size_1) {
+ const size_t n_elems = size_1 - idx_1;
+ memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t));
+ pos += n_elems;
+ } else if (idx_2 < size_2) {
+ const size_t n_elems = size_2 - idx_2;
+ memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t));
+ pos += n_elems;
+ }
+
+ return pos;
+}
+
+size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
+ const uint32_t *set_2, size_t size_2) {
+ size_t pos = 0, idx_1 = 0, idx_2 = 0;
+
+ if (0 == size_2) {
+ return size_1;
+ }
+ if (0 == size_1) {
+ return size_2;
+ }
+
+ uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
+
+ while (true) {
+ if (val_1 < val_2) {
+ ++idx_1;
+ ++pos;
+ if (idx_1 >= size_1) break;
+ val_1 = set_1[idx_1];
+ } else if (val_2 < val_1) {
+ ++idx_2;
+ ++pos;
+ if (idx_2 >= size_2) break;
+ val_2 = set_2[idx_2];
+ } else {
+ ++idx_1;
+ ++idx_2;
+ ++pos;
+ if (idx_1 >= size_1 || idx_2 >= size_2) break;
+ val_1 = set_1[idx_1];
+ val_2 = set_2[idx_2];
+ }
+ }
+
+ if (idx_1 < size_1) {
+ const size_t n_elems = size_1 - idx_1;
+ pos += n_elems;
+ } else if (idx_2 < size_2) {
+ const size_t n_elems = size_2 - idx_2;
+ pos += n_elems;
+ }
+ return pos;
+}
+
+
+
+size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
+ size_t size_2, uint16_t *buffer) {
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ // compute union with smallest array first
+ if (size_1 < size_2) {
+ return union_vector16(set_1, (uint32_t)size_1,
+ set_2, (uint32_t)size_2, buffer);
+ } else {
+ return union_vector16(set_2, (uint32_t)size_2,
+ set_1, (uint32_t)size_1, buffer);
+ }
+ } else {
+ // compute union with smallest array first
+ if (size_1 < size_2) {
+ return union_uint16(
+ set_1, size_1, set_2, size_2, buffer);
+ } else {
+ return union_uint16(
+ set_2, size_2, set_1, size_1, buffer);
+ }
+ }
+#else
+ // compute union with smallest array first
+ if (size_1 < size_2) {
+ return union_uint16(
+ set_1, size_1, set_2, size_2, buffer);
+ } else {
+ return union_uint16(
+ set_2, size_2, set_1, size_1, buffer);
+ }
+#endif
+}
+#ifdef CROARING_IS_X64
+CROARING_TARGET_AVX2
+static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
+ const uint8_t *ptr1 = (const uint8_t *)s1;
+ const uint8_t *ptr2 = (const uint8_t *)s2;
+ const uint8_t *end1 = ptr1 + n;
+ const uint8_t *end8 = ptr1 + n/8*8;
+ const uint8_t *end32 = ptr1 + n/32*32;
+
+ while (ptr1 < end32) {
+ __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1);
+ __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2);
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
+ if ((uint32_t)mask != UINT32_MAX) {
+ return false;
+ }
+ ptr1 += 32;
+ ptr2 += 32;
+ }
+
+ while (ptr1 < end8) {
+ uint64_t v1, v2;
+ memcpy(&v1,ptr1,sizeof(uint64_t));
+ memcpy(&v2,ptr2,sizeof(uint64_t));
+ if (v1 != v2) {
+ return false;
+ }
+ ptr1 += 8;
+ ptr2 += 8;
+ }
+
+ while (ptr1 < end1) {
+ if (*ptr1 != *ptr2) {
+ return false;
+ }
+ ptr1++;
+ ptr2++;
+ }
+
+ return true;
+}
+CROARING_UNTARGET_REGION
+#endif
+
+bool memequals(const void *s1, const void *s2, size_t n) {
+ if (n == 0) {
+ return true;
+ }
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ return _avx2_memequals(s1, s2, n);
+ } else {
+ return memcmp(s1, s2, n) == 0;
+ }
+#else
+ return memcmp(s1, s2, n) == 0;
+#endif
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/array_util.c */
+/* begin file src/bitset_util.c */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+#ifdef CROARING_IS_X64
+static uint8_t lengthTable[256] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+#endif
+
+#ifdef CROARING_IS_X64
+ALIGNED(32)
+static uint32_t vecDecodeTable[256][8] = {
+ {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
+ {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
+ {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
+ {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
+ {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
+ {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
+ {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
+ {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
+ {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
+ {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
+ {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
+ {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
+ {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
+ {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
+ {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
+ {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
+ {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
+ {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
+ {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
+ {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
+ {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
+ {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
+ {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
+ {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
+ {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
+ {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
+ {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
+ {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
+ {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
+ {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
+ {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
+ {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
+ {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
+ {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
+ {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
+ {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
+ {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
+ {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
+ {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
+ {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
+ {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
+ {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
+ {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
+ {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
+ {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
+ {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
+ {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
+ {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
+ {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
+ {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
+ {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
+ {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
+ {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
+ {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
+ {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
+ {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
+ {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
+ {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
+ {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
+ {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
+ {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
+ {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
+ {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
+ {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
+ {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
+ {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
+ {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
+ {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
+ {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
+ {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
+ {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
+ {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
+ {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
+ {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
+ {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
+ {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
+ {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
+ {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
+ {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
+ {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
+ {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
+ {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
+ {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
+ {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
+ {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
+ {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
+ {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
+ {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
+ {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
+ {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
+ {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
+ {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
+ {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
+ {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
+ {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
+ {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
+ {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
+ {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
+ {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
+ {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
+ {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
+ {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
+ {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
+ {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
+ {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
+ {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
+ {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
+ {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
+ {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
+ {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
+ {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
+ {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
+ {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
+ {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
+ {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
+ {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
+ {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
+ {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
+ {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
+ {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
+ {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
+ {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
+ {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
+ {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
+ {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
+ {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
+ {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
+ {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
+ {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
+ {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
+ {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
+ {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
+ {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
+ {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
+ {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
+ {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
+ {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
+ {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
+ {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
+ {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
+ {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
+ {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
+ {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
+ {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
+ {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
+ {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
+ {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
+ {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
+ {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
+ {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
+ {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
+ {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
+ {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
+ {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
+ {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
+ {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
+ {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
+ {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
+ {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
+ {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
+ {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
+ {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
+ {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
+ {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
+ {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
+ {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
+ {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
+ {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
+ {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
+ {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
+ {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
+ {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
+ {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
+ {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
+ {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
+ {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
+ {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
+ {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
+ {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
+ {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
+ {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
+ {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
+ {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
+ {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
+ {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
+ {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
+ {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
+ {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
+ {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
+ {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
+ {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
+ {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
+ {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
+ {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
+ {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
+ {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
+ {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
+ {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
+ {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
+ {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
+ {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
+ {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
+ {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
+ {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
+ {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
+ {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
+ {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
+ {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
+ {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
+ {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
+ {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
+ {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
+ {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
+ {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
+ {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
+ {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
+ {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
+ {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
+ {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
+ {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
+ {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
+ {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
+ {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
+ {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
+ {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
+ {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
+ {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
+ {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
+ {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
+ {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
+ {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
+ {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
+ {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
+ {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
+ {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
+ {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
+ {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
+ {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
+ {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
+ {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
+ {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
+ {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
+ {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
+ {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
+ {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
+ {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
+ {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
+ {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
+ {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
+ {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
+ {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
+ {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
+ {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
+ {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
+ {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
+ {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
+};
+
+#endif // #ifdef CROARING_IS_X64
+
+#ifdef CROARING_IS_X64
+// same as vecDecodeTable but in 16 bits
+ALIGNED(32)
+static uint16_t vecDecodeTable_uint16[256][8] = {
+ {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
+ {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
+ {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
+ {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
+ {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
+ {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
+ {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
+ {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
+ {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
+ {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
+ {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
+ {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
+ {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
+ {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
+ {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
+ {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
+ {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
+ {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
+ {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
+ {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
+ {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
+ {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
+ {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
+ {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
+ {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
+ {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
+ {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
+ {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
+ {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
+ {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
+ {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
+ {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
+ {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
+ {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
+ {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
+ {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
+ {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
+ {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
+ {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
+ {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
+ {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
+ {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
+ {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
+ {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
+ {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
+ {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
+ {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
+ {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
+ {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
+ {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
+ {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
+ {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
+ {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
+ {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
+ {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
+ {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
+ {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
+ {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
+ {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
+ {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
+ {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
+ {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
+ {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
+ {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
+ {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
+ {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
+ {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
+ {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
+ {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
+ {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
+ {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
+ {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
+ {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
+ {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
+ {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
+ {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
+ {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
+ {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
+ {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
+ {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
+ {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
+ {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
+ {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
+ {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
+ {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
+ {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
+ {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
+ {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
+ {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
+ {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
+ {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
+ {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
+ {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
+ {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
+ {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
+ {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
+ {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
+ {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
+ {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
+ {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
+ {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
+ {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
+ {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
+ {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
+ {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
+ {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
+ {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
+ {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
+ {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
+ {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
+ {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
+ {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
+ {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
+ {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
+ {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
+ {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
+ {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
+ {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
+ {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
+ {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
+ {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
+ {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
+ {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
+ {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
+ {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
+ {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
+ {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
+ {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
+ {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
+ {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
+ {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
+ {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
+ {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
+ {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
+ {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
+ {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
+ {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
+ {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
+ {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
+ {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
+ {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
+ {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
+ {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
+ {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
+ {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
+ {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
+ {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
+ {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
+ {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
+ {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
+ {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
+ {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
+ {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
+ {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
+ {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
+ {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
+ {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
+ {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
+ {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
+ {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
+ {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
+ {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
+ {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
+ {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
+ {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
+ {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
+ {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
+ {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
+ {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
+ {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
+ {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
+ {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
+ {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
+ {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
+ {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
+ {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
+ {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
+ {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
+ {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
+ {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
+ {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
+ {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
+ {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
+ {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
+ {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
+ {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
+ {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
+ {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
+ {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
+ {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
+ {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
+ {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
+ {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
+ {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
+ {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
+ {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
+ {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
+ {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
+ {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
+ {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
+ {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
+ {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
+ {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
+ {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
+ {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
+ {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
+ {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
+ {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
+ {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
+ {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
+ {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
+ {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
+ {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
+ {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
+ {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
+ {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
+ {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
+ {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
+ {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
+ {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
+ {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
+ {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
+ {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
+ {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
+ {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
+ {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
+ {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
+ {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
+ {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
+ {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
+ {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
+ {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
+ {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
+ {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
+ {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
+ {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
+ {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
+ {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
+ {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
+ {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
+ {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
+ {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
+ {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
+ {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
+ {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
+ {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
+ {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
+ {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
+ {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
+ {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
+ {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
+ {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
+ {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
+ {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
+ {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
+ {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
+};
+
+#endif
+
+#ifdef CROARING_IS_X64
+CROARING_TARGET_AVX2
+size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
+ uint32_t *out, size_t outcapacity,
+ uint32_t base) {
+ uint32_t *initout = out;
+ __m256i baseVec = _mm256_set1_epi32(base - 1);
+ __m256i incVec = _mm256_set1_epi32(64);
+ __m256i add8 = _mm256_set1_epi32(8);
+ uint32_t *safeout = out + outcapacity;
+ size_t i = 0;
+ for (; (i < length) && (out + 64 <= safeout); ++i) {
+ uint64_t w = words[i];
+ if (w == 0) {
+ baseVec = _mm256_add_epi32(baseVec, incVec);
+ } else {
+ for (int k = 0; k < 4; ++k) {
+ uint8_t byteA = (uint8_t)w;
+ uint8_t byteB = (uint8_t)(w >> 8);
+ w >>= 16;
+ __m256i vecA =
+ _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]);
+ __m256i vecB =
+ _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]);
+ uint8_t advanceA = lengthTable[byteA];
+ uint8_t advanceB = lengthTable[byteB];
+ vecA = _mm256_add_epi32(baseVec, vecA);
+ baseVec = _mm256_add_epi32(baseVec, add8);
+ vecB = _mm256_add_epi32(baseVec, vecB);
+ baseVec = _mm256_add_epi32(baseVec, add8);
+ _mm256_storeu_si256((__m256i *)out, vecA);
+ out += advanceA;
+ _mm256_storeu_si256((__m256i *)out, vecB);
+ out += advanceB;
+ }
+ }
+ }
+ base += i * 64;
+ for (; (i < length) && (out < safeout); ++i) {
+ uint64_t w = words[i];
+ while ((w != 0) && (out < safeout)) {
+ uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
+ int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+ uint32_t val = r + base;
+ memcpy(out, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ out++;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return out - initout;
+}
+CROARING_UNTARGET_REGION
+#endif // CROARING_IS_X64
+
+size_t bitset_extract_setbits(const uint64_t *words, size_t length,
+ uint32_t *out, uint32_t base) {
+ int outpos = 0;
+ size_t i;
+ for (i = 0; i < length; ++i) {
+ uint64_t w = words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
+ int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+ uint32_t val = r + base;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return outpos;
+}
+
+size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
+ const uint64_t * __restrict__ words2,
+ size_t length, uint16_t *out,
+ uint16_t base) {
+ int outpos = 0;
+ size_t i;
+
+ for (i = 0; i < length; ++i) {
+ uint64_t w = words1[i] & words2[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ out[outpos++] = r + base;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return outpos;
+}
+
+#ifdef CROARING_IS_X64
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out" as 16-bit integers, values start at "base" (can
+ *be set to zero).
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ *
+ * This function uses SSE decoding.
+ */
+CROARING_TARGET_AVX2
+size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
+ uint16_t *out, size_t outcapacity,
+ uint16_t base) {
+ uint16_t *initout = out;
+ __m128i baseVec = _mm_set1_epi16(base - 1);
+ __m128i incVec = _mm_set1_epi16(64);
+ __m128i add8 = _mm_set1_epi16(8);
+ uint16_t *safeout = out + outcapacity;
+ const int numberofbytes = 2; // process two bytes at a time
+ size_t i = 0;
+ for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) {
+ uint64_t w = words[i];
+ if (w == 0) {
+ baseVec = _mm_add_epi16(baseVec, incVec);
+ } else {
+ for (int k = 0; k < 4; ++k) {
+ uint8_t byteA = (uint8_t)w;
+ uint8_t byteB = (uint8_t)(w >> 8);
+ w >>= 16;
+ __m128i vecA = _mm_loadu_si128(
+ (const __m128i *)vecDecodeTable_uint16[byteA]);
+ __m128i vecB = _mm_loadu_si128(
+ (const __m128i *)vecDecodeTable_uint16[byteB]);
+ uint8_t advanceA = lengthTable[byteA];
+ uint8_t advanceB = lengthTable[byteB];
+ vecA = _mm_add_epi16(baseVec, vecA);
+ baseVec = _mm_add_epi16(baseVec, add8);
+ vecB = _mm_add_epi16(baseVec, vecB);
+ baseVec = _mm_add_epi16(baseVec, add8);
+ _mm_storeu_si128((__m128i *)out, vecA);
+ out += advanceA;
+ _mm_storeu_si128((__m128i *)out, vecB);
+ out += advanceB;
+ }
+ }
+ }
+ base += (uint16_t)(i * 64);
+ for (; (i < length) && (out < safeout); ++i) {
+ uint64_t w = words[i];
+ while ((w != 0) && (out < safeout)) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ *out = r + base;
+ out++;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return out - initout;
+}
+CROARING_UNTARGET_REGION
+#endif
+
+/*
+ * Given a bitset containing "length" 64-bit words, write out the position
+ * of all the set bits to "out", values start at "base" (can be set to zero).
+ *
+ * The "out" pointer should be sufficient to store the actual number of bits
+ *set.
+ *
+ * Returns how many values were actually decoded.
+ */
+size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
+ uint16_t *out, uint16_t base) {
+ int outpos = 0;
+ size_t i;
+ for (i = 0; i < length; ++i) {
+ uint64_t w = words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ out[outpos++] = r + base;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return outpos;
+}
+
+#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64)
+
+static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, pos;
+ uint64_t shift = 6;
+ const uint16_t *end = list + length;
+ if (!length) return card;
+ // TODO: could unroll for performance, see bitset_set_list
+ // bts is not available as an intrinsic in GCC
+ __asm volatile(
+ "1:\n"
+ "movzwq (%[list]), %[pos]\n"
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)\n"
+ "sbb $-1, %[card]\n"
+ "add $2, %[list]\n"
+ "cmp %[list], %[end]\n"
+ "jnz 1b"
+ : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
+ [pos] "=&r"(pos), [offset] "=&r"(offset)
+ : [end] "r"(end), [words] "r"(words), [shift] "r"(shift));
+ return card;
+}
+
+static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t pos;
+ const uint16_t *end = list + length;
+
+ uint64_t shift = 6;
+ uint64_t offset;
+ uint64_t load;
+ for (; list + 3 < end; list += 4) {
+ pos = list[0];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ pos = list[1];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ pos = list[2];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ pos = list[3];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ }
+
+ while (list != end) {
+ pos = list[0];
+ __asm volatile(
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "bts %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)"
+ : [load] "=&r"(load), [offset] "=&r"(offset)
+ : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
+ list++;
+ }
+}
+
+static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ uint64_t offset, load, pos;
+ uint64_t shift = 6;
+ const uint16_t *end = list + length;
+ if (!length) return card;
+ // btr is not available as an intrinsic in GCC
+ __asm volatile(
+ "1:\n"
+ "movzwq (%[list]), %[pos]\n"
+ "shrx %[shift], %[pos], %[offset]\n"
+ "mov (%[words],%[offset],8), %[load]\n"
+ "btr %[pos], %[load]\n"
+ "mov %[load], (%[words],%[offset],8)\n"
+ "sbb $0, %[card]\n"
+ "add $2, %[list]\n"
+ "cmp %[list], %[end]\n"
+ "jnz 1b"
+ : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
+ [pos] "=&r"(pos), [offset] "=&r"(offset)
+ : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)
+ :
+ /* clobbers */ "memory");
+ return card;
+}
+
+static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *(const uint16_t *)list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load & ~(UINT64_C(1) << index);
+ card -= (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ card += (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ words[offset] = newload;
+ list++;
+ }
+}
+
+uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ if( croaring_avx2() ) {
+ return _asm_bitset_clear_list(words, card, list, length);
+ } else {
+ return _scalar_bitset_clear_list(words, card, list, length);
+ }
+}
+
+uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ if( croaring_avx2() ) {
+ return _asm_bitset_set_list_withcard(words, card, list, length);
+ } else {
+ return _scalar_bitset_set_list_withcard(words, card, list, length);
+ }
+}
+
+void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ if( croaring_avx2() ) {
+ _asm_bitset_set_list(words, list, length);
+ } else {
+ _scalar_bitset_set_list(words, list, length);
+ }
+}
+#else
+uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
+ uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *(const uint16_t *)list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load & ~(UINT64_C(1) << index);
+ card -= (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ card += (load ^ newload) >> index;
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load | (UINT64_C(1) << index);
+ words[offset] = newload;
+ list++;
+ }
+}
+
+#endif
+
+/* flip specified bits */
+/* TODO: consider whether worthwhile to make an asm version */
+
+uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
+ const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load ^ (UINT64_C(1) << index);
+ // todo: is a branch here all that bad?
+ card +=
+ (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1
+ words[offset] = newload;
+ list++;
+ }
+ return card;
+}
+
+void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
+ uint64_t offset, load, newload, pos, index;
+ const uint16_t *end = list + length;
+ while (list != end) {
+ pos = *list;
+ offset = pos >> 6;
+ index = pos % 64;
+ load = words[offset];
+ newload = load ^ (UINT64_C(1) << index);
+ words[offset] = newload;
+ list++;
+ }
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/bitset_util.c */
+/* begin file src/containers/array.c */
+/*
+ * array.c
+ *
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+extern inline uint16_t array_container_minimum(const array_container_t *arr);
+extern inline uint16_t array_container_maximum(const array_container_t *arr);
+extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x);
+
+extern inline int array_container_rank(const array_container_t *arr,
+ uint16_t x);
+extern inline bool array_container_contains(const array_container_t *arr,
+ uint16_t pos);
+extern inline int array_container_cardinality(const array_container_t *array);
+extern inline bool array_container_nonzero_cardinality(const array_container_t *array);
+extern inline int32_t array_container_serialized_size_in_bytes(int32_t card);
+extern inline bool array_container_empty(const array_container_t *array);
+extern inline bool array_container_full(const array_container_t *array);
+
+/* Create a new array with capacity size. Return NULL in case of failure. */
+array_container_t *array_container_create_given_capacity(int32_t size) {
+ array_container_t *container;
+
+ if ((container = (array_container_t *)roaring_malloc(sizeof(array_container_t))) ==
+ NULL) {
+ return NULL;
+ }
+
+ if( size <= 0 ) { // we don't want to rely on malloc(0)
+ container->array = NULL;
+ } else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) * size)) ==
+ NULL) {
+ roaring_free(container);
+ return NULL;
+ }
+
+ container->capacity = size;
+ container->cardinality = 0;
+
+ return container;
+}
+
+/* Create a new array. Return NULL in case of failure. */
+array_container_t *array_container_create() {
+ return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
+}
+
+/* Create a new array containing all values in [min,max). */
+array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
+ array_container_t * answer = array_container_create_given_capacity(max - min + 1);
+ uint32_t k;
+ if(answer == NULL) return answer;
+ answer->cardinality = 0;
+ for(k = min; k < max; k++) {
+ answer->array[answer->cardinality++] = k;
+ }
+ return answer;
+}
+
+/* Duplicate container */
+array_container_t *array_container_clone(const array_container_t *src) {
+ array_container_t *newcontainer =
+ array_container_create_given_capacity(src->capacity);
+ if (newcontainer == NULL) return NULL;
+
+ newcontainer->cardinality = src->cardinality;
+
+ memcpy(newcontainer->array, src->array,
+ src->cardinality * sizeof(uint16_t));
+
+ return newcontainer;
+}
+
+void array_container_offset(const array_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset) {
+ array_container_t *lo = NULL, *hi = NULL;
+ int top, lo_cap, hi_cap;
+
+ top = (1 << 16) - offset;
+
+ lo_cap = count_less(c->array, c->cardinality, top);
+ if (loc && lo_cap) {
+ int i;
+ lo = array_container_create_given_capacity(lo_cap);
+ for (i = 0; i < lo_cap; ++i) {
+ array_container_add(lo, c->array[i] + offset);
+ }
+ *loc = (container_t*)lo;
+ }
+
+ hi_cap = c->cardinality - lo_cap;
+ if (hic && hi_cap) {
+ int i;
+ hi = array_container_create_given_capacity(hi_cap);
+ for (i = lo_cap; i < c->cardinality; ++i) {
+ array_container_add(hi, c->array[i] + offset);
+ }
+ *hic = (container_t*)hi;
+ }
+}
+
+int array_container_shrink_to_fit(array_container_t *src) {
+ if (src->cardinality == src->capacity) return 0; // nothing to do
+ int savings = src->capacity - src->cardinality;
+ src->capacity = src->cardinality;
+ if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs
+ roaring_free(src->array);
+ src->array = NULL;
+ } else {
+ uint16_t *oldarray = src->array;
+ src->array =
+ (uint16_t *)roaring_realloc(oldarray, src->capacity * sizeof(uint16_t));
+ if (src->array == NULL) roaring_free(oldarray); // should never happen?
+ }
+ return savings;
+}
+
+/* Free memory. */
+void array_container_free(array_container_t *arr) {
+ if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise
+ roaring_free(arr->array);
+ arr->array = NULL; // pedantic
+ }
+ roaring_free(arr);
+}
+
+static inline int32_t grow_capacity(int32_t capacity) {
+ return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE
+ : capacity < 64 ? capacity * 2
+ : capacity < 1024 ? capacity * 3 / 2
+ : capacity * 5 / 4;
+}
+
+static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
+ return ((val < min) ? min : (val > max) ? max : val);
+}
+
+void array_container_grow(array_container_t *container, int32_t min,
+ bool preserve) {
+
+ int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
+ int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max);
+
+ container->capacity = new_capacity;
+ uint16_t *array = container->array;
+
+ if (preserve) {
+ container->array =
+ (uint16_t *)roaring_realloc(array, new_capacity * sizeof(uint16_t));
+ if (container->array == NULL) roaring_free(array);
+ } else {
+ // Jon Strabala reports that some tools complain otherwise
+ if (array != NULL) {
+ roaring_free(array);
+ }
+ container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
+ }
+
+ // handle the case where realloc fails
+ if (container->array == NULL) {
+ fprintf(stderr, "could not allocate memory\n");
+ }
+ assert(container->array != NULL);
+}
+
+/* Copy one container into another. We assume that they are distinct. */
+void array_container_copy(const array_container_t *src,
+ array_container_t *dst) {
+ const int32_t cardinality = src->cardinality;
+ if (cardinality > dst->capacity) {
+ array_container_grow(dst, cardinality, false);
+ }
+
+ dst->cardinality = cardinality;
+ memcpy(dst->array, src->array, cardinality * sizeof(uint16_t));
+}
+
+void array_container_add_from_range(array_container_t *arr, uint32_t min,
+ uint32_t max, uint16_t step) {
+ uint32_t value;
+ for (value = min; value < max; value += step) {
+ array_container_append(arr, value);
+ }
+}
+
+/* Computes the union of array1 and array2 and write the result to arrayout.
+ * It is assumed that arrayout is distinct from both array1 and array2.
+ */
+void array_container_union(const array_container_t *array_1,
+ const array_container_t *array_2,
+ array_container_t *out) {
+ const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
+ const int32_t max_cardinality = card_1 + card_2;
+
+ if (out->capacity < max_cardinality) {
+ array_container_grow(out, max_cardinality, false);
+ }
+ out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1,
+ array_2->array, card_2, out->array);
+
+}
+
+/* Computes the difference of array1 and array2 and write the result
+ * to array out.
+ * Array out does not need to be distinct from array_1
+ */
+void array_container_andnot(const array_container_t *array_1,
+ const array_container_t *array_2,
+ array_container_t *out) {
+ if (out->capacity < array_1->cardinality)
+ array_container_grow(out, array_1->cardinality, false);
+#ifdef CROARING_IS_X64
+ if(( croaring_avx2() ) && (out != array_1) && (out != array_2)) {
+ out->cardinality =
+ difference_vector16(array_1->array, array_1->cardinality,
+ array_2->array, array_2->cardinality, out->array);
+ } else {
+ out->cardinality =
+ difference_uint16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
+ }
+#else
+ out->cardinality =
+ difference_uint16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
+#endif
+}
+
+/* Computes the symmetric difference of array1 and array2 and write the
+ * result
+ * to arrayout.
+ * It is assumed that arrayout is distinct from both array1 and array2.
+ */
+void array_container_xor(const array_container_t *array_1,
+ const array_container_t *array_2,
+ array_container_t *out) {
+ const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
+ const int32_t max_cardinality = card_1 + card_2;
+ if (out->capacity < max_cardinality) {
+ array_container_grow(out, max_cardinality, false);
+ }
+
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ out->cardinality =
+ xor_vector16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
+ } else {
+ out->cardinality =
+ xor_uint16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
+ }
+#else
+ out->cardinality =
+ xor_uint16(array_1->array, array_1->cardinality, array_2->array,
+ array_2->cardinality, out->array);
+#endif
+}
+
+static inline int32_t minimum_int32(int32_t a, int32_t b) {
+ return (a < b) ? a : b;
+}
+
+/* computes the intersection of array1 and array2 and write the result to
+ * arrayout.
+ * It is assumed that arrayout is distinct from both array1 and array2.
+ * */
+void array_container_intersection(const array_container_t *array1,
+ const array_container_t *array2,
+ array_container_t *out) {
+ int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
+ min_card = minimum_int32(card_1, card_2);
+ const int threshold = 64; // subject to tuning
+#ifdef CROARING_IS_X64
+ if (out->capacity < min_card) {
+ array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
+ false);
+ }
+#else
+ if (out->capacity < min_card) {
+ array_container_grow(out, min_card, false);
+ }
+#endif
+
+ if (card_1 * threshold < card_2) {
+ out->cardinality = intersect_skewed_uint16(
+ array1->array, card_1, array2->array, card_2, out->array);
+ } else if (card_2 * threshold < card_1) {
+ out->cardinality = intersect_skewed_uint16(
+ array2->array, card_2, array1->array, card_1, out->array);
+ } else {
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ out->cardinality = intersect_vector16(
+ array1->array, card_1, array2->array, card_2, out->array);
+ } else {
+ out->cardinality = intersect_uint16(array1->array, card_1,
+ array2->array, card_2, out->array);
+ }
+#else
+ out->cardinality = intersect_uint16(array1->array, card_1,
+ array2->array, card_2, out->array);
+#endif
+ }
+}
+
+/* computes the size of the intersection of array1 and array2
+ * */
+int array_container_intersection_cardinality(const array_container_t *array1,
+ const array_container_t *array2) {
+ int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
+ const int threshold = 64; // subject to tuning
+ if (card_1 * threshold < card_2) {
+ return intersect_skewed_uint16_cardinality(array1->array, card_1,
+ array2->array, card_2);
+ } else if (card_2 * threshold < card_1) {
+ return intersect_skewed_uint16_cardinality(array2->array, card_2,
+ array1->array, card_1);
+ } else {
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ return intersect_vector16_cardinality(array1->array, card_1,
+ array2->array, card_2);
+ } else {
+ return intersect_uint16_cardinality(array1->array, card_1,
+ array2->array, card_2);
+ }
+#else
+ return intersect_uint16_cardinality(array1->array, card_1,
+ array2->array, card_2);
+#endif
+ }
+}
+
+bool array_container_intersect(const array_container_t *array1,
+ const array_container_t *array2) {
+ int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
+ const int threshold = 64; // subject to tuning
+ if (card_1 * threshold < card_2) {
+ return intersect_skewed_uint16_nonempty(
+ array1->array, card_1, array2->array, card_2);
+ } else if (card_2 * threshold < card_1) {
+ return intersect_skewed_uint16_nonempty(
+ array2->array, card_2, array1->array, card_1);
+ } else {
+ // we do not bother vectorizing
+ return intersect_uint16_nonempty(array1->array, card_1,
+ array2->array, card_2);
+ }
+}
+
+/* computes the intersection of array1 and array2 and write the result to
+ * array1.
+ * */
+void array_container_intersection_inplace(array_container_t *src_1,
+ const array_container_t *src_2) {
+ // todo: can any of this be vectorized?
+ int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
+ const int threshold = 64; // subject to tuning
+ if (card_1 * threshold < card_2) {
+ src_1->cardinality = intersect_skewed_uint16(
+ src_1->array, card_1, src_2->array, card_2, src_1->array);
+ } else if (card_2 * threshold < card_1) {
+ src_1->cardinality = intersect_skewed_uint16(
+ src_2->array, card_2, src_1->array, card_1, src_1->array);
+ } else {
+ src_1->cardinality = intersect_uint16(
+ src_1->array, card_1, src_2->array, card_2, src_1->array);
+ }
+}
+
+ALLOW_UNALIGNED
+int array_container_to_uint32_array(void *vout, const array_container_t *cont,
+ uint32_t base) {
+ int outpos = 0, i;
+ uint32_t *out = (uint32_t *)vout;
+ for (i = 0; i < cont->cardinality; ++i) {
+ const uint32_t val = base + cont->array[i];
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ return outpos;
+}
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+void array_container_printf(const array_container_t *v) {
+ int i;
+ if (v->cardinality == 0) {
+ printf("{}");
+ return;
+ }
+ printf("{");
+ printf("%d", v->array[0]);
+ for (i = 1; i < v->cardinality; ++i) {
+ printf(",%d", v->array[i]);
+ }
+ printf("}");
+}
+
+void array_container_printf_as_uint32_array(const array_container_t *v,
+ uint32_t base) {
+ int i;
+
+ if (v->cardinality == 0) {
+ return;
+ }
+ printf("%u", v->array[0] + base);
+ for (i = 1; i < v->cardinality; ++i) {
+ printf(",%u", v->array[i] + base);
+ }
+}
+#endif
+
+/* Compute the number of runs */
+int32_t array_container_number_of_runs(const array_container_t *ac) {
+ // Can SIMD work here?
+ int32_t nr_runs = 0;
+ int32_t prev = -2;
+ uint16_t *p;
+ for (p = ac->array; p != ac->array + ac->cardinality; ++p) {
+ if (*p != prev + 1) nr_runs++;
+ prev = *p;
+ }
+ return nr_runs;
+}
+
+/**
+ * Writes the underlying array to buf, outputs how many bytes were written.
+ * The number of bytes written should be
+ * array_container_size_in_bytes(container).
+ *
+ */
+int32_t array_container_write(const array_container_t *container, char *buf) {
+ memcpy(buf, container->array, container->cardinality * sizeof(uint16_t));
+ return array_container_size_in_bytes(container);
+}
+
+bool array_container_is_subset(const array_container_t *container1,
+ const array_container_t *container2) {
+ if (container1->cardinality > container2->cardinality) {
+ return false;
+ }
+ int i1 = 0, i2 = 0;
+ while (i1 < container1->cardinality && i2 < container2->cardinality) {
+ if (container1->array[i1] == container2->array[i2]) {
+ i1++;
+ i2++;
+ } else if (container1->array[i1] > container2->array[i2]) {
+ i2++;
+ } else { // container1->array[i1] < container2->array[i2]
+ return false;
+ }
+ }
+ if (i1 == container1->cardinality) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+int32_t array_container_read(int32_t cardinality, array_container_t *container,
+ const char *buf) {
+ if (container->capacity < cardinality) {
+ array_container_grow(container, cardinality, false);
+ }
+ container->cardinality = cardinality;
+ memcpy(container->array, buf, container->cardinality * sizeof(uint16_t));
+
+ return array_container_size_in_bytes(container);
+}
+
+bool array_container_iterate(const array_container_t *cont, uint32_t base,
+ roaring_iterator iterator, void *ptr) {
+ int i;
+ for (i = 0; i < cont->cardinality; i++)
+ if (!iterator(cont->array[i] + base, ptr)) return false;
+ return true;
+}
+
+bool array_container_iterate64(const array_container_t *cont, uint32_t base,
+ roaring_iterator64 iterator, uint64_t high_bits,
+ void *ptr) {
+ int i;
+ for (i = 0; i < cont->cardinality; i++)
+ if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr))
+ return false;
+ return true;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/array.c */
+/* begin file src/containers/bitset.c */
+/*
+ * bitset.c
+ *
+ */
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+extern inline int bitset_container_cardinality(const bitset_container_t *bitset);
+extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos);
+// unused at this time:
+//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_get(const bitset_container_t *bitset,
+ uint16_t pos);
+extern inline int32_t bitset_container_serialized_size_in_bytes(void);
+extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos);
+extern inline bool bitset_container_contains(const bitset_container_t *bitset,
+ uint16_t pos);
+
+void bitset_container_clear(bitset_container_t *bitset) {
+ memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ bitset->cardinality = 0;
+}
+
+void bitset_container_set_all(bitset_container_t *bitset) {
+ memset(bitset->words, INT64_C(-1),
+ sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ bitset->cardinality = (1 << 16);
+}
+
+
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_container_t *bitset_container_create(void) {
+ bitset_container_t *bitset =
+ (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
+
+ if (!bitset) {
+ return NULL;
+ }
+ // sizeof(__m256i) == 32
+ bitset->words = (uint64_t *)roaring_aligned_malloc(
+ 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ if (!bitset->words) {
+ roaring_free(bitset);
+ return NULL;
+ }
+ bitset_container_clear(bitset);
+ return bitset;
+}
+
+/* Copy one container into another. We assume that they are distinct. */
+void bitset_container_copy(const bitset_container_t *source,
+ bitset_container_t *dest) {
+ dest->cardinality = source->cardinality;
+ memcpy(dest->words, source->words,
+ sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+}
+
+void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
+ uint32_t max, uint16_t step) {
+ if (step == 0) return; // refuse to crash
+ if ((64 % step) == 0) { // step divides 64
+ uint64_t mask = 0; // construct the repeated mask
+ uint32_t value;
+ for (value = (min % step); value < 64; value += step) {
+ mask |= ((uint64_t)1 << value);
+ }
+ uint32_t firstword = min / 64;
+ uint32_t endword = (max - 1) / 64;
+ uint32_t i;
+ bitset->cardinality = (max - min + step - 1) / step;
+ if (firstword == endword) {
+ bitset->words[firstword] |=
+ mask & (((~UINT64_C(0)) << (min % 64)) &
+ ((~UINT64_C(0)) >> ((~max + 1) % 64)));
+ return;
+ }
+ bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64));
+ for (i = firstword + 1; i < endword; i++)
+ bitset->words[i] = mask;
+ bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64));
+ } else {
+ uint32_t value;
+ for (value = min; value < max; value += step) {
+ bitset_container_add(bitset, value);
+ }
+ }
+}
+
+/* Free memory. */
+void bitset_container_free(bitset_container_t *bitset) {
+ if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise
+ roaring_aligned_free(bitset->words);
+ bitset->words = NULL; // pedantic
+ }
+ roaring_free(bitset);
+}
+
+/* duplicate container. */
+bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
+ bitset_container_t *bitset =
+ (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
+
+ if (!bitset) {
+ return NULL;
+ }
+ // sizeof(__m256i) == 32
+ bitset->words = (uint64_t *)roaring_aligned_malloc(
+ 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ if (!bitset->words) {
+ roaring_free(bitset);
+ return NULL;
+ }
+ bitset->cardinality = src->cardinality;
+ memcpy(bitset->words, src->words,
+ sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+ return bitset;
+}
+
+void bitset_container_offset(const bitset_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset) {
+ bitset_container_t *bc = NULL;
+ uint64_t val;
+ uint16_t b, i, end;
+
+ b = offset >> 6;
+ i = offset % 64;
+ end = 1024 - b;
+
+ if (loc != NULL) {
+ bc = bitset_container_create();
+ if (i == 0) {
+ memcpy(bc->words+b, c->words, 8*end);
+ } else {
+ uint32_t k;
+ bc->words[b] = c->words[0] << i;
+ for (k = 1; k < end; ++k) {
+ val = c->words[k] << i;
+ val |= c->words[k-1] >> (64 - i);
+ bc->words[b+k] = val;
+ }
+ }
+
+ bc->cardinality = bitset_container_compute_cardinality(bc);
+ if (bc->cardinality != 0) {
+ *loc = bc;
+ }
+ if (bc->cardinality == c->cardinality) {
+ return;
+ }
+ }
+
+ if (hic == NULL) {
+ // Both hic and loc can't be NULL, so bc is never NULL here
+ if (bc->cardinality == 0) {
+ bitset_container_free(bc);
+ }
+ return;
+ }
+
+ if (bc == NULL || bc->cardinality != 0) {
+ bc = bitset_container_create();
+ }
+
+ if (i == 0) {
+ memcpy(bc->words, c->words+end, 8*b);
+ } else {
+ uint32_t k;
+
+ for ( k = end; k < 1024; ++k) {
+ val = c->words[k] << i;
+ val |= c->words[k-1] >> (64 - i);
+ bc->words[k-end] = val;
+ }
+ bc->words[b] = c->words[1023] >> (64 - i);
+ }
+
+ bc->cardinality = bitset_container_compute_cardinality(bc);
+ if (bc->cardinality == 0) {
+ bitset_container_free(bc);
+ return;
+ }
+ *hic = bc;
+}
+
+void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
+ uint32_t end) {
+ bitset_set_range(bitset->words, begin, end);
+ bitset->cardinality =
+ bitset_container_compute_cardinality(bitset); // could be smarter
+}
+
+
+bool bitset_container_intersect(const bitset_container_t *src_1,
+ const bitset_container_t *src_2) {
+ // could vectorize, but this is probably already quite fast in practice
+ const uint64_t * __restrict__ words_1 = src_1->words;
+ const uint64_t * __restrict__ words_2 = src_2->words;
+ int i;
+ for ( i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
+ if((words_1[i] & words_2[i]) != 0) return true;
+ }
+ return false;
+}
+
+
+#ifdef CROARING_IS_X64
+#ifndef WORDS_IN_AVX2_REG
+#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
+#endif
+/* Get the number of bits set (force computation) */
+static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ const uint64_t *words = bitset->words;
+ int32_t sum = 0;
+ for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
+ sum += hamming(words[i]);
+ sum += hamming(words[i + 1]);
+ sum += hamming(words[i + 2]);
+ sum += hamming(words[i + 3]);
+ }
+ return sum;
+}
+/* Get the number of bits set (force computation) */
+int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ if( croaring_avx2() ) {
+ return (int) avx2_harley_seal_popcount256(
+ (const __m256i *)bitset->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
+ } else {
+ return _scalar_bitset_container_compute_cardinality(bitset);
+
+ }
+}
+
+#elif defined(USENEON)
+int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ uint16x8_t n0 = vdupq_n_u16(0);
+ uint16x8_t n1 = vdupq_n_u16(0);
+ uint16x8_t n2 = vdupq_n_u16(0);
+ uint16x8_t n3 = vdupq_n_u16(0);
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) {
+ uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]);
+ n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0))));
+ uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]);
+ n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1))));
+ uint64x2_t c2 = vld1q_u64(&bitset->words[i + 4]);
+ n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2))));
+ uint64x2_t c3 = vld1q_u64(&bitset->words[i + 6]);
+ n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3))));
+ }
+ uint64x2_t n = vdupq_n_u64(0);
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0)));
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1)));
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2)));
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3)));
+ return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1);
+}
+
+#else // CROARING_IS_X64
+
+/* Get the number of bits set (force computation) */
+int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+ const uint64_t *words = bitset->words;
+ int32_t sum = 0, i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
+ sum += hamming(words[i]);
+ sum += hamming(words[i + 1]);
+ sum += hamming(words[i + 2]);
+ sum += hamming(words[i + 3]);
+ }
+ return sum;
+}
+
+#endif // CROARING_IS_X64
+
+#ifdef CROARING_IS_X64
+
+#define BITSET_CONTAINER_FN_REPEAT 8
+#ifndef WORDS_IN_AVX2_REG
+#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
+#endif // WORDS_IN_AVX2_REG
+#define LOOP_SIZE \
+ BITSET_CONTAINER_SIZE_IN_WORDS / \
+ ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT)
+
+/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
+ result to bitsetout */
+// clang-format off
+#define AVX_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ static inline int _avx2_bitset_container_##opname##_nocard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words; \
+ const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words; \
+ /* not using the blocking optimization for some reason*/ \
+ uint8_t *out = (uint8_t *)dst->words; \
+ const int innerloop = 8; \
+ for (size_t i = 0; \
+ i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \
+ i += innerloop) { \
+ __m256i A1, A2, AO; \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)out, AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 32), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 64), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 96), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 128), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 160), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 192), AO); \
+ A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224)); \
+ A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224)); \
+ AO = avx_intrinsic(A2, A1); \
+ _mm256_storeu_si256((__m256i *)(out + 224), AO); \
+ out += 256; \
+ words_1 += 256; \
+ words_2 += 256; \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+ }
+
+#define AVX_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ /* next, a version that updates cardinality*/ \
+ static inline int _avx2_bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words; \
+ const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words; \
+ __m256i *out = (__m256i *)dst->words; \
+ dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \
+ words_2, words_1, out, \
+ BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \
+ return dst->cardinality; \
+ } \
+
+#define AVX_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic, after) \
+ /* next, a version that just computes the cardinality*/ \
+ static inline int _avx2_bitset_container_##opname##_justcard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2) { \
+ const __m256i *__restrict__ data1 = (const __m256i *)src_1->words; \
+ const __m256i *__restrict__ data2 = (const __m256i *)src_2->words; \
+ return (int)avx2_harley_seal_popcount256_##opname( \
+ data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \
+ }
+
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX2
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+
+#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \
+ neon_intrinsic) \
+ static inline int _scalar_bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t *__restrict__ words_1 = src_1->words; \
+ const uint64_t *__restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ int32_t sum = 0; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
+ out[i] = word_1; \
+ out[i + 1] = word_2; \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ dst->cardinality = sum; \
+ return dst->cardinality; \
+ } \
+ static inline int _scalar_bitset_container_##opname##_nocard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t *__restrict__ words_1 = src_1->words; \
+ const uint64_t *__restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
+ out[i] = (words_1[i])opsymbol(words_2[i]); \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+ } \
+ static inline int _scalar_bitset_container_##opname##_justcard( \
+ const bitset_container_t *src_1, const bitset_container_t *src_2) { \
+ const uint64_t *__restrict__ words_1 = src_1->words; \
+ const uint64_t *__restrict__ words_2 = src_2->words; \
+ int32_t sum = 0; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ return sum; \
+ }
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+SCALAR_BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
+SCALAR_BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+SCALAR_BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
+SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
+
+SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
+SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
+
+
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+ int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ if ( croaring_avx2() ) { \
+ return _avx2_bitset_container_##opname(src_1, src_2, dst); \
+ } else { \
+ return _scalar_bitset_container_##opname(src_1, src_2, dst); \
+ } \
+ } \
+ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ if ( croaring_avx2() ) { \
+ return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } else { \
+ return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \
+ } \
+ } \
+ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ if ((croaring_detect_supported_architectures() & CROARING_AVX2) == \
+ CROARING_AVX2) { \
+ return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \
+ } else { \
+ return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \
+ } \
+ }
+
+
+
+#elif defined(USENEON)
+
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ uint16x8_t n0 = vdupq_n_u16(0); \
+ uint16x8_t n1 = vdupq_n_u16(0); \
+ uint16x8_t n2 = vdupq_n_u16(0); \
+ uint16x8_t n3 = vdupq_n_u16(0); \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
+ vld1q_u64(&words_2[i + 0])); \
+ n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
+ vst1q_u64(&out[i + 0], c0); \
+ uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
+ vld1q_u64(&words_2[i + 2])); \
+ n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
+ vst1q_u64(&out[i + 2], c1); \
+ uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
+ vld1q_u64(&words_2[i + 4])); \
+ n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
+ vst1q_u64(&out[i + 4], c2); \
+ uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
+ vld1q_u64(&words_2[i + 6])); \
+ n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
+ vst1q_u64(&out[i + 6], c3); \
+ } \
+ uint64x2_t n = vdupq_n_u64(0); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
+ dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
+ vld1q_u64(&words_2[i + 0]))); \
+ vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
+ vld1q_u64(&words_2[i + 2]))); \
+ vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
+ vld1q_u64(&words_2[i + 4]))); \
+ vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
+ vld1q_u64(&words_2[i + 6]))); \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint16x8_t n0 = vdupq_n_u16(0); \
+ uint16x8_t n1 = vdupq_n_u16(0); \
+ uint16x8_t n2 = vdupq_n_u16(0); \
+ uint16x8_t n3 = vdupq_n_u16(0); \
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
+ uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \
+ vld1q_u64(&words_2[i + 0])); \
+ n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
+ uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \
+ vld1q_u64(&words_2[i + 2])); \
+ n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
+ uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \
+ vld1q_u64(&words_2[i + 4])); \
+ n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
+ uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \
+ vld1q_u64(&words_2[i + 6])); \
+ n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
+ } \
+ uint64x2_t n = vdupq_n_u64(0); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
+ n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
+ return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
+}
+
+#else
+
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
+int bitset_container_##opname(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ int32_t sum = 0; \
+ size_t i; \
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
+ out[i] = word_1; \
+ out[i + 1] = word_2; \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ dst->cardinality = sum; \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2, \
+ bitset_container_t *dst) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ uint64_t *out = dst->words; \
+ size_t i; \
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
+ out[i] = (words_1[i])opsymbol(words_2[i]); \
+ } \
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
+ return dst->cardinality; \
+} \
+int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
+ const bitset_container_t *src_2) { \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_2 = src_2->words; \
+ int32_t sum = 0; \
+ size_t i; \
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
+ const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \
+ word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
+ sum += hamming(word_1); \
+ sum += hamming(word_2); \
+ } \
+ return sum; \
+}
+
+#endif // CROARING_IS_X64
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
+BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
+BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
+
+BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
+BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
+// clang-format On
+
+
+ALLOW_UNALIGNED
+int bitset_container_to_uint32_array(
+ uint32_t *out,
+ const bitset_container_t *bc,
+ uint32_t base
+){
+#ifdef CROARING_IS_X64
+ if(( croaring_avx2() ) && (bc->cardinality >= 8192)) // heuristic
+ return (int) bitset_extract_setbits_avx2(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
+ else
+ return (int) bitset_extract_setbits(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
+#else
+ return (int) bitset_extract_setbits(bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
+#endif
+}
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/*
+ * Print this container using printf (useful for debugging).
+ */
+void bitset_container_printf(const bitset_container_t * v) {
+ printf("{");
+ uint32_t base = 0;
+ bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
+ int i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ uint64_t w = v->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(iamfirst) {// predicted to be false
+ printf("%u",base + r);
+ iamfirst = false;
+ } else {
+ printf(",%u",base + r);
+ }
+ w ^= t;
+ }
+ base += 64;
+ }
+ printf("}");
+}
+
+
+/*
+ * Print this container using printf as a comma-separated list of 32-bit integers starting at base.
+ */
+void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) {
+ bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
+ int i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ uint64_t w = v->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(iamfirst) {// predicted to be false
+ printf("%u", r + base);
+ iamfirst = false;
+ } else {
+ printf(",%u",r + base);
+ }
+ w ^= t;
+ }
+ base += 64;
+ }
+}
+#endif
+
+
+// TODO: use the fast lower bound, also
+int bitset_container_number_of_runs(bitset_container_t *bc) {
+ int num_runs = 0, i;
+ uint64_t next_word = bc->words[0];
+
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
+ uint64_t word = next_word;
+ next_word = bc->words[i+1];
+ num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
+ }
+
+ uint64_t word = next_word;
+ num_runs += hamming((~word) & (word << 1));
+ if((word & 0x8000000000000000ULL) != 0)
+ num_runs++;
+ return num_runs;
+}
+
+
+int32_t bitset_container_write(const bitset_container_t *container,
+ char *buf) {
+ memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ return bitset_container_size_in_bytes(container);
+}
+
+
+int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
+ const char *buf) {
+ container->cardinality = cardinality;
+ memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ return bitset_container_size_in_bytes(container);
+}
+
+bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
+ int32_t i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ uint64_t w = cont->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(!iterator(r + base, ptr)) return false;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return true;
+}
+
+bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) {
+ int32_t i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ uint64_t w = cont->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
+ w ^= t;
+ }
+ base += 64;
+ }
+ return true;
+}
+
+#ifdef CROARING_IS_X64
+CROARING_TARGET_AVX2
+ALLOW_UNALIGNED
+static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
+ const __m256i *ptr1 = (const __m256i*)container1->words;
+ const __m256i *ptr2 = (const __m256i*)container2->words;
+ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
+ __m256i r1 = _mm256_loadu_si256(ptr1+i);
+ __m256i r2 = _mm256_loadu_si256(ptr2+i);
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
+ if ((uint32_t)mask != UINT32_MAX) {
+ return false;
+ }
+ }
+ return true;
+}
+CROARING_UNTARGET_REGION
+#endif // CROARING_IS_X64
+
+ALLOW_UNALIGNED
+bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
+ if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
+ if(container1->cardinality != container2->cardinality) {
+ return false;
+ }
+ if (container1->cardinality == INT32_C(0x10000)) {
+ return true;
+ }
+ }
+#ifdef CROARING_IS_X64
+ if( croaring_avx2() ) {
+ return _avx2_bitset_container_equals(container1, container2);
+ }
+#endif
+ return memcmp(container1->words,
+ container2->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0;
+}
+
+bool bitset_container_is_subset(const bitset_container_t *container1,
+ const bitset_container_t *container2) {
+ if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
+ if(container1->cardinality > container2->cardinality) {
+ return false;
+ }
+ }
+ int32_t i;
+ for(i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ if((container1->words[i] & container2->words[i]) != container1->words[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) {
+ int card = bitset_container_cardinality(container);
+ if(rank >= *start_rank + card) {
+ *start_rank += card;
+ return false;
+ }
+ const uint64_t *words = container->words;
+ int32_t size;
+ int i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
+ size = hamming(words[i]);
+ if(rank <= *start_rank + size) {
+ uint64_t w = container->words[i];
+ uint16_t base = i*64;
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ int r = __builtin_ctzll(w);
+ if(*start_rank == rank) {
+ *element = r+base;
+ return true;
+ }
+ w ^= t;
+ *start_rank += 1;
+ }
+ }
+ else
+ *start_rank += size;
+ }
+ assert(false);
+ __builtin_unreachable();
+}
+
+
+/* Returns the smallest value (assumes not empty) */
+uint16_t bitset_container_minimum(const bitset_container_t *container) {
+ int32_t i;
+ for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
+ uint64_t w = container->words[i];
+ if (w != 0) {
+ int r = __builtin_ctzll(w);
+ return r + i * 64;
+ }
+ }
+ return UINT16_MAX;
+}
+
+/* Returns the largest value (assumes not empty) */
+uint16_t bitset_container_maximum(const bitset_container_t *container) {
+ int32_t i;
+ for (i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
+ uint64_t w = container->words[i];
+ if (w != 0) {
+ int r = __builtin_clzll(w);
+ return i * 64 + 63 - r;
+ }
+ }
+ return 0;
+}
+
+/* Returns the number of values equal or smaller than x */
+int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
+ // credit: aqrit
+ int sum = 0;
+ int i = 0, end;
+ for (end = x / 64; i < end; i++){
+ sum += hamming(container->words[i]);
+ }
+ uint64_t lastword = container->words[i];
+ uint64_t lastpos = UINT64_C(1) << (x % 64);
+ uint64_t mask = lastpos + lastpos - 1; // smear right
+ sum += hamming(lastword & mask);
+ return sum;
+}
+
+/* Returns the index of the first value equal or larger than x, or -1 */
+int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
+ uint32_t x32 = x;
+ uint32_t k = x32 / 64;
+ uint64_t word = container->words[k];
+ const int diff = x32 - k * 64; // in [0,64)
+ word = (word >> diff) << diff; // a mask is faster, but we don't care
+ while(word == 0) {
+ k++;
+ if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
+ word = container->words[k];
+ }
+ return k * 64 + __builtin_ctzll(word);
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/bitset.c */
+/* begin file src/containers/containers.c */
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+extern inline const container_t *container_unwrap_shared(
+ const container_t *candidate_shared_container, uint8_t *type);
+
+extern inline container_t *container_mutable_unwrap_shared(
+ container_t *candidate_shared_container, uint8_t *type);
+
+extern inline int container_get_cardinality(
+ const container_t *c, uint8_t typecode);
+
+extern inline container_t *container_iand(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_ior(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_ixor(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_iandnot(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+void container_free(container_t *c, uint8_t type) {
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_free(CAST_bitset(c));
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ array_container_free(CAST_array(c));
+ break;
+ case RUN_CONTAINER_TYPE:
+ run_container_free(CAST_run(c));
+ break;
+ case SHARED_CONTAINER_TYPE:
+ shared_container_free(CAST_shared(c));
+ break;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+}
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+void run_container_printf(const run_container_t *cont);
+void run_container_printf_as_uint32_array(const run_container_t *cont,
+ uint32_t base);
+
+void container_printf(const container_t *c, uint8_t type) {
+ c = container_unwrap_shared(c, &type);
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_printf(const_CAST_bitset(c));
+ return;
+ case ARRAY_CONTAINER_TYPE:
+ array_container_printf(const_CAST_array(c));
+ return;
+ case RUN_CONTAINER_TYPE:
+ run_container_printf(const_CAST_run(c));
+ return;
+ default:
+ __builtin_unreachable();
+ }
+}
+
+void container_printf_as_uint32_array(
+ const container_t *c, uint8_t typecode,
+ uint32_t base
+){
+ c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ bitset_container_printf_as_uint32_array(
+ const_CAST_bitset(c), base);
+ return;
+ case ARRAY_CONTAINER_TYPE:
+ array_container_printf_as_uint32_array(
+ const_CAST_array(c), base);
+ return;
+ case RUN_CONTAINER_TYPE:
+ run_container_printf_as_uint32_array(
+ const_CAST_run(c), base);
+ return;
+ default:
+ __builtin_unreachable();
+ }
+}
+#endif
+
+extern inline bool container_nonzero_cardinality(
+ const container_t *c, uint8_t typecode);
+
+extern inline int container_to_uint32_array(
+ uint32_t *output,
+ const container_t *c, uint8_t typecode,
+ uint32_t base);
+
+extern inline container_t *container_add(
+ container_t *c,
+ uint16_t val,
+ uint8_t typecode, // !!! 2nd arg?
+ uint8_t *new_typecode);
+
+extern inline bool container_contains(
+ const container_t *c,
+ uint16_t val,
+ uint8_t typecode); // !!! 2nd arg?
+
+extern inline container_t *container_and(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_or(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_xor(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+container_t *get_copy_of_container(
+ container_t *c, uint8_t *typecode,
+ bool copy_on_write
+){
+ if (copy_on_write) {
+ shared_container_t *shared_container;
+ if (*typecode == SHARED_CONTAINER_TYPE) {
+ shared_container = CAST_shared(c);
+ shared_container->counter += 1;
+ return shared_container;
+ }
+ assert(*typecode != SHARED_CONTAINER_TYPE);
+
+ if ((shared_container = (shared_container_t *)roaring_malloc(
+ sizeof(shared_container_t))) == NULL) {
+ return NULL;
+ }
+
+ shared_container->container = c;
+ shared_container->typecode = *typecode;
+
+ shared_container->counter = 2;
+ *typecode = SHARED_CONTAINER_TYPE;
+
+ return shared_container;
+ } // copy_on_write
+ // otherwise, no copy on write...
+ const container_t *actual_container = container_unwrap_shared(c, typecode);
+ assert(*typecode != SHARED_CONTAINER_TYPE);
+ return container_clone(actual_container, *typecode);
+}
+
+/**
+ * Copies a container, requires a typecode. This allocates new memory, caller
+ * is responsible for deallocation.
+ */
+container_t *container_clone(const container_t *c, uint8_t typecode) {
+ // We do not want to allow cloning of shared containers.
+ // c = container_unwrap_shared(c, &typecode);
+ switch (typecode) {
+ case BITSET_CONTAINER_TYPE:
+ return bitset_container_clone(const_CAST_bitset(c));
+ case ARRAY_CONTAINER_TYPE:
+ return array_container_clone(const_CAST_array(c));
+ case RUN_CONTAINER_TYPE:
+ return run_container_clone(const_CAST_run(c));
+ case SHARED_CONTAINER_TYPE:
+ // Shared containers are not cloneable. Are you mixing COW and non-COW bitmaps?
+ return NULL;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+container_t *shared_container_extract_copy(
+ shared_container_t *sc, uint8_t *typecode
+){
+ assert(sc->counter > 0);
+ assert(sc->typecode != SHARED_CONTAINER_TYPE);
+ sc->counter--;
+ *typecode = sc->typecode;
+ container_t *answer;
+ if (sc->counter == 0) {
+ answer = sc->container;
+ sc->container = NULL; // paranoid
+ roaring_free(sc);
+ } else {
+ answer = container_clone(sc->container, *typecode);
+ }
+ assert(*typecode != SHARED_CONTAINER_TYPE);
+ return answer;
+}
+
+void shared_container_free(shared_container_t *container) {
+ assert(container->counter > 0);
+ container->counter--;
+ if (container->counter == 0) {
+ assert(container->typecode != SHARED_CONTAINER_TYPE);
+ container_free(container->container, container->typecode);
+ container->container = NULL; // paranoid
+ roaring_free(container);
+ }
+}
+
+extern inline container_t *container_not(
+ const container_t *c1, uint8_t type1,
+ uint8_t *result_type);
+
+extern inline container_t *container_not_range(
+ const container_t *c1, uint8_t type1,
+ uint32_t range_start, uint32_t range_end,
+ uint8_t *result_type);
+
+extern inline container_t *container_inot(
+ container_t *c1, uint8_t type1,
+ uint8_t *result_type);
+
+extern inline container_t *container_inot_range(
+ container_t *c1, uint8_t type1,
+ uint32_t range_start, uint32_t range_end,
+ uint8_t *result_type);
+
+extern inline container_t *container_range_of_ones(
+ uint32_t range_start, uint32_t range_end,
+ uint8_t *result_type);
+
+// where are the correponding things for union and intersection??
+extern inline container_t *container_lazy_xor(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_lazy_ixor(
+ container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+extern inline container_t *container_andnot(
+ const container_t *c1, uint8_t type1,
+ const container_t *c2, uint8_t type2,
+ uint8_t *result_type);
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/containers.c */
+/* begin file src/containers/convert.c */
+#include <stdio.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// file contains grubby stuff that must know impl. details of all container
+// types.
+bitset_container_t *bitset_container_from_array(const array_container_t *ac) {
+ bitset_container_t *ans = bitset_container_create();
+ int limit = array_container_cardinality(ac), i;
+ for (i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
+ return ans;
+}
+
+bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
+ int card = run_container_cardinality(arr);
+ bitset_container_t *answer = bitset_container_create();
+ int rlepos;
+ for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
+ rle16_t vl = arr->runs[rlepos];
+ bitset_set_lenrange(answer->words, vl.value, vl.length);
+ }
+ answer->cardinality = card;
+ return answer;
+}
+
+array_container_t *array_container_from_run(const run_container_t *arr) {
+ array_container_t *answer =
+ array_container_create_given_capacity(run_container_cardinality(arr));
+ answer->cardinality = 0;
+ int rlepos;
+ for (rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
+ int run_start = arr->runs[rlepos].value;
+ int run_end = run_start + arr->runs[rlepos].length;
+ int run_value;
+ for (run_value = run_start; run_value <= run_end; ++run_value) {
+ answer->array[answer->cardinality++] = (uint16_t)run_value;
+ }
+ }
+ return answer;
+}
+
+array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
+ array_container_t *result =
+ array_container_create_given_capacity(bits->cardinality);
+ result->cardinality = bits->cardinality;
+ // sse version ends up being slower here
+ // (bitset_extract_setbits_sse_uint16)
+ // because of the sparsity of the data
+ bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+ result->array, 0);
+ return result;
+}
+
+/* assumes that container has adequate space. Run from [s,e] (inclusive) */
+static void add_run(run_container_t *rc, int s, int e) {
+ rc->runs[rc->n_runs].value = s;
+ rc->runs[rc->n_runs].length = e - s;
+ rc->n_runs++;
+}
+
+run_container_t *run_container_from_array(const array_container_t *c) {
+ int32_t n_runs = array_container_number_of_runs(c);
+ run_container_t *answer = run_container_create_given_capacity(n_runs);
+ int prev = -2;
+ int run_start = -1, i;
+ int32_t card = c->cardinality;
+ if (card == 0) return answer;
+ for (i = 0; i < card; ++i) {
+ const uint16_t cur_val = c->array[i];
+ if (cur_val != prev + 1) {
+ // new run starts; flush old one, if any
+ if (run_start != -1) add_run(answer, run_start, prev);
+ run_start = cur_val;
+ }
+ prev = c->array[i];
+ }
+ // now prev is the last seen value
+ add_run(answer, run_start, prev);
+ // assert(run_container_cardinality(answer) == c->cardinality);
+ return answer;
+}
+
+/**
+ * Convert the runcontainer to either a Bitmap or an Array Container, depending
+ * on the cardinality. Frees the container.
+ * Allocates and returns new container, which caller is responsible for freeing.
+ * It does not free the run container.
+ */
+container_t *convert_to_bitset_or_array_container(
+ run_container_t *rc, int32_t card,
+ uint8_t *resulttype
+){
+ if (card <= DEFAULT_MAX_SIZE) {
+ array_container_t *answer = array_container_create_given_capacity(card);
+ int rlepos;
+ answer->cardinality = 0;
+ for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
+ uint16_t run_start = rc->runs[rlepos].value;
+ uint16_t run_end = run_start + rc->runs[rlepos].length;
+ uint16_t run_value;
+ for (run_value = run_start; run_value < run_end;
+ ++run_value) {
+ answer->array[answer->cardinality++] = run_value;
+ }
+ answer->array[answer->cardinality++] = run_end;
+ }
+ assert(card == answer->cardinality);
+ *resulttype = ARRAY_CONTAINER_TYPE;
+ //run_container_free(r);
+ return answer;
+ }
+ bitset_container_t *answer = bitset_container_create();
+ int rlepos;
+ for (rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
+ uint16_t run_start = rc->runs[rlepos].value;
+ bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length);
+ }
+ answer->cardinality = card;
+ *resulttype = BITSET_CONTAINER_TYPE;
+ //run_container_free(r);
+ return answer;
+}
+
+/* Converts a run container to either an array or a bitset, IF it saves space.
+ */
+/* If a conversion occurs, the caller is responsible to free the original
+ * container and
+ * he becomes responsible to free the new one. */
+container_t *convert_run_to_efficient_container(
+ run_container_t *c,
+ uint8_t *typecode_after
+){
+ int32_t size_as_run_container =
+ run_container_serialized_size_in_bytes(c->n_runs);
+
+ int32_t size_as_bitset_container =
+ bitset_container_serialized_size_in_bytes();
+ int32_t card = run_container_cardinality(c);
+ int32_t size_as_array_container =
+ array_container_serialized_size_in_bytes(card);
+
+ int32_t min_size_non_run =
+ size_as_bitset_container < size_as_array_container
+ ? size_as_bitset_container
+ : size_as_array_container;
+ if (size_as_run_container <= min_size_non_run) { // no conversion
+ *typecode_after = RUN_CONTAINER_TYPE;
+ return c;
+ }
+ if (card <= DEFAULT_MAX_SIZE) {
+ // to array
+ array_container_t *answer = array_container_create_given_capacity(card);
+ answer->cardinality = 0;
+ int rlepos;
+ for (rlepos = 0; rlepos < c->n_runs; ++rlepos) {
+ int run_start = c->runs[rlepos].value;
+ int run_end = run_start + c->runs[rlepos].length;
+ int run_value;
+ for (run_value = run_start; run_value <= run_end; ++run_value) {
+ answer->array[answer->cardinality++] = (uint16_t)run_value;
+ }
+ }
+ *typecode_after = ARRAY_CONTAINER_TYPE;
+ return answer;
+ }
+
+ // else to bitset
+ bitset_container_t *answer = bitset_container_create();
+ int rlepos;
+ for (rlepos = 0; rlepos < c->n_runs; ++rlepos) {
+ int start = c->runs[rlepos].value;
+ int end = start + c->runs[rlepos].length;
+ bitset_set_range(answer->words, start, end + 1);
+ }
+ answer->cardinality = card;
+ *typecode_after = BITSET_CONTAINER_TYPE;
+ return answer;
+}
+
+// like convert_run_to_efficient_container but frees the old result if needed
+container_t *convert_run_to_efficient_container_and_free(
+ run_container_t *c,
+ uint8_t *typecode_after
+){
+ container_t *answer = convert_run_to_efficient_container(c, typecode_after);
+ if (answer != c) run_container_free(c);
+ return answer;
+}
+
+/* once converted, the original container is disposed here, rather than
+ in roaring_array
+*/
+
+// TODO: split into run- array- and bitset- subfunctions for sanity;
+// a few function calls won't really matter.
+
+container_t *convert_run_optimize(
+ container_t *c, uint8_t typecode_original,
+ uint8_t *typecode_after
+){
+ if (typecode_original == RUN_CONTAINER_TYPE) {
+ container_t *newc = convert_run_to_efficient_container(
+ CAST_run(c), typecode_after);
+ if (newc != c) {
+ container_free(c, typecode_original);
+ }
+ return newc;
+ } else if (typecode_original == ARRAY_CONTAINER_TYPE) {
+ // it might need to be converted to a run container.
+ array_container_t *c_qua_array = CAST_array(c);
+ int32_t n_runs = array_container_number_of_runs(c_qua_array);
+ int32_t size_as_run_container =
+ run_container_serialized_size_in_bytes(n_runs);
+ int32_t card = array_container_cardinality(c_qua_array);
+ int32_t size_as_array_container =
+ array_container_serialized_size_in_bytes(card);
+
+ if (size_as_run_container >= size_as_array_container) {
+ *typecode_after = ARRAY_CONTAINER_TYPE;
+ return c;
+ }
+ // else convert array to run container
+ run_container_t *answer = run_container_create_given_capacity(n_runs);
+ int prev = -2;
+ int run_start = -1;
+ int i;
+
+ assert(card > 0);
+
+ for (i = 0; i < card; ++i) {
+ uint16_t cur_val = c_qua_array->array[i];
+ if (cur_val != prev + 1) {
+ // new run starts; flush old one, if any
+ if (run_start != -1) add_run(answer, run_start, prev);
+ run_start = cur_val;
+ }
+ prev = c_qua_array->array[i];
+ }
+ assert(run_start >= 0);
+ // now prev is the last seen value
+ add_run(answer, run_start, prev);
+ *typecode_after = RUN_CONTAINER_TYPE;
+ array_container_free(c_qua_array);
+ return answer;
+ } else if (typecode_original ==
+ BITSET_CONTAINER_TYPE) { // run conversions on bitset
+ // does bitset need conversion to run?
+ bitset_container_t *c_qua_bitset = CAST_bitset(c);
+ int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
+ int32_t size_as_run_container =
+ run_container_serialized_size_in_bytes(n_runs);
+ int32_t size_as_bitset_container =
+ bitset_container_serialized_size_in_bytes();
+
+ if (size_as_bitset_container <= size_as_run_container) {
+ // no conversion needed.
+ *typecode_after = BITSET_CONTAINER_TYPE;
+ return c;
+ }
+ // bitset to runcontainer (ported from Java RunContainer(
+ // BitmapContainer bc, int nbrRuns))
+ assert(n_runs > 0); // no empty bitmaps
+ run_container_t *answer = run_container_create_given_capacity(n_runs);
+
+ int long_ctr = 0;
+ uint64_t cur_word = c_qua_bitset->words[0];
+ while (true) {
+ while (cur_word == UINT64_C(0) &&
+ long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
+ cur_word = c_qua_bitset->words[++long_ctr];
+
+ if (cur_word == UINT64_C(0)) {
+ bitset_container_free(c_qua_bitset);
+ *typecode_after = RUN_CONTAINER_TYPE;
+ return answer;
+ }
+
+ int local_run_start = __builtin_ctzll(cur_word);
+ int run_start = local_run_start + 64 * long_ctr;
+ uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
+
+ int run_end = 0;
+ while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
+ long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
+ cur_word_with_1s = c_qua_bitset->words[++long_ctr];
+
+ if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
+ run_end = 64 + long_ctr * 64; // exclusive, I guess
+ add_run(answer, run_start, run_end - 1);
+ bitset_container_free(c_qua_bitset);
+ *typecode_after = RUN_CONTAINER_TYPE;
+ return answer;
+ }
+ int local_run_end = __builtin_ctzll(~cur_word_with_1s);
+ run_end = local_run_end + long_ctr * 64;
+ add_run(answer, run_start, run_end - 1);
+ cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
+ }
+ return answer;
+ } else {
+ assert(false);
+ __builtin_unreachable();
+ return NULL;
+ }
+}
+
+container_t *container_from_run_range(
+ const run_container_t *run,
+ uint32_t min, uint32_t max, uint8_t *typecode_after
+){
+ // We expect most of the time to end up with a bitset container
+ bitset_container_t *bitset = bitset_container_create();
+ *typecode_after = BITSET_CONTAINER_TYPE;
+ int32_t union_cardinality = 0;
+ int32_t i;
+ for (i = 0; i < run->n_runs; ++i) {
+ uint32_t rle_min = run->runs[i].value;
+ uint32_t rle_max = rle_min + run->runs[i].length;
+ bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min);
+ union_cardinality += run->runs[i].length + 1;
+ }
+ union_cardinality += max - min + 1;
+ union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min);
+ bitset_set_lenrange(bitset->words, min, max - min);
+ bitset->cardinality = union_cardinality;
+ if(bitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // we need to convert to an array container
+ array_container_t * array = array_container_from_bitset(bitset);
+ *typecode_after = ARRAY_CONTAINER_TYPE;
+ bitset_container_free(bitset);
+ return array;
+ }
+ return bitset;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/convert.c */
+/* begin file src/containers/mixed_andnot.c */
+/*
+ * mixed_andnot.c. More methods since operation is not symmetric,
+ * except no "wide" andnot , so no lazy options motivated.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst, a valid array container that could be the same as dst.*/
+void array_bitset_container_andnot(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ array_container_t *dst) {
+ // follows Java implementation as of June 2016
+ if (dst->capacity < src_1->cardinality) {
+ array_container_grow(dst, src_1->cardinality, false);
+ }
+ int32_t newcard = 0;
+ const int32_t origcard = src_1->cardinality;
+ int i;
+ for (i = 0; i < origcard; ++i) {
+ uint16_t key = src_1->array[i];
+ dst->array[newcard] = key;
+ newcard += 1 - bitset_container_contains(src_2, key);
+ }
+ dst->cardinality = newcard;
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * src_1 */
+
+void array_bitset_container_iandnot(array_container_t *src_1,
+ const bitset_container_t *src_2) {
+ array_bitset_container_andnot(src_1, src_2, src_1);
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst, which does not initially have a valid container.
+ * Return true for a bitset result; false for array
+ */
+
+bool bitset_array_container_andnot(
+ const bitset_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ // Java did this directly, but we have option of asm or avx
+ bitset_container_t *result = bitset_container_create();
+ bitset_container_copy(src_1, result);
+ result->cardinality =
+ (int32_t)bitset_clear_list(result->words, (uint64_t)result->cardinality,
+ src_2->array, (uint64_t)src_2->cardinality);
+
+ // do required type conversions.
+ if (result->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(result);
+ bitset_container_free(result);
+ return false;
+ }
+ *dst = result;
+ return true;
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_array_container_iandnot(
+ bitset_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ *dst = src_1;
+ src_1->cardinality =
+ (int32_t)bitset_clear_list(src_1->words, (uint64_t)src_1->cardinality,
+ src_2->array, (uint64_t)src_2->cardinality);
+
+ if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(src_1);
+ bitset_container_free(src_1);
+ return false; // not bitset
+ } else
+ return true;
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_andnot(
+ const run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ // follows the Java implementation as of June 2016
+ int card = run_container_cardinality(src_1);
+ if (card <= DEFAULT_MAX_SIZE) {
+ // must be an array
+ array_container_t *answer = array_container_create_given_capacity(card);
+ answer->cardinality = 0;
+ int32_t rlepos;
+ for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ int run_value;
+ for (run_value = rle.value; run_value <= rle.value + rle.length;
+ ++run_value) {
+ if (!bitset_container_get(src_2, (uint16_t)run_value)) {
+ answer->array[answer->cardinality++] = (uint16_t)run_value;
+ }
+ }
+ }
+ *dst = answer;
+ return false;
+ } else { // we guess it will be a bitset, though have to check guess when
+ // done
+ bitset_container_t *answer = bitset_container_clone(src_2);
+
+ uint32_t last_pos = 0;
+ int32_t rlepos;
+ for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+
+ uint32_t start = rle.value;
+ uint32_t end = start + rle.length + 1;
+ bitset_reset_range(answer->words, last_pos, start);
+ bitset_flip_range(answer->words, start, end);
+ last_pos = end;
+ }
+ bitset_reset_range(answer->words, last_pos, (uint32_t)(1 << 16));
+
+ answer->cardinality = bitset_container_compute_cardinality(answer);
+
+ if (answer->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(answer);
+ bitset_container_free(answer);
+ return false; // not bitset
+ }
+ *dst = answer;
+ return true; // bitset
+ }
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_iandnot(
+ run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ // dummy implementation
+ bool ans = run_bitset_container_andnot(src_1, src_2, dst);
+ run_container_free(src_1);
+ return ans;
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool bitset_run_container_andnot(
+ const bitset_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ // follows Java implementation
+ bitset_container_t *result = bitset_container_create();
+ int32_t rlepos;
+ bitset_container_copy(src_1, result);
+
+ for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
+ rle16_t rle = src_2->runs[rlepos];
+ bitset_reset_range(result->words, rle.value,
+ rle.value + rle.length + UINT32_C(1));
+ }
+ result->cardinality = bitset_container_compute_cardinality(result);
+
+ if (result->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(result);
+ bitset_container_free(result);
+ return false; // not bitset
+ }
+ *dst = result;
+ return true; // bitset
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_run_container_iandnot(
+ bitset_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ *dst = src_1;
+
+ int32_t rlepos; for (rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
+ rle16_t rle = src_2->runs[rlepos];
+ bitset_reset_range(src_1->words, rle.value,
+ rle.value + rle.length + UINT32_C(1));
+ }
+ src_1->cardinality = bitset_container_compute_cardinality(src_1);
+
+ if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(src_1);
+ bitset_container_free(src_1);
+ return false; // not bitset
+ } else
+ return true;
+}
+
+/* helper. a_out must be a valid array container with adequate capacity.
+ * Returns the cardinality of the output container. Partly Based on Java
+ * implementation Util.unsignedDifference.
+ *
+ * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper
+ * to avoid advanceUntil?
+ */
+
+static int run_array_array_subtract(const run_container_t *rc,
+ const array_container_t *a_in,
+ array_container_t *a_out) {
+ int out_card = 0;
+ int32_t in_array_pos =
+ -1; // since advanceUntil always assumes we start the search AFTER this
+
+ int rlepos; for (rlepos = 0; rlepos < rc->n_runs; rlepos++) {
+ int32_t start = rc->runs[rlepos].value;
+ int32_t end = start + rc->runs[rlepos].length + 1;
+
+ in_array_pos = advanceUntil(a_in->array, in_array_pos,
+ a_in->cardinality, (uint16_t)start);
+
+ if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
+ int32_t i; for (i = start; i < end; ++i)
+ a_out->array[out_card++] = (uint16_t)i;
+ } else {
+ uint16_t next_nonincluded = a_in->array[in_array_pos];
+ if (next_nonincluded >= end) {
+ // another case when run goes unaltered
+ int32_t i; for (i = start; i < end; ++i)
+ a_out->array[out_card++] = (uint16_t)i;
+ in_array_pos--; // ensure we see this item again if necessary
+ } else {
+ int32_t i; for (i = start; i < end; ++i)
+ if (i != next_nonincluded)
+ a_out->array[out_card++] = (uint16_t)i;
+ else // 0 should ensure we don't match
+ next_nonincluded =
+ (in_array_pos + 1 >= a_in->cardinality)
+ ? 0
+ : a_in->array[++in_array_pos];
+ in_array_pos--; // see again
+ }
+ }
+ }
+ return out_card;
+}
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any type of container.
+ */
+
+int run_array_container_andnot(
+ const run_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ // follows the Java impl as of June 2016
+
+ int card = run_container_cardinality(src_1);
+ const int arbitrary_threshold = 32;
+
+ if (card <= arbitrary_threshold) {
+ if (src_2->cardinality == 0) {
+ *dst = run_container_clone(src_1);
+ return RUN_CONTAINER_TYPE;
+ }
+ // Java's "lazyandNot.toEfficientContainer" thing
+ run_container_t *answer = run_container_create_given_capacity(
+ card + array_container_cardinality(src_2));
+
+ int rlepos = 0;
+ int xrlepos = 0; // "x" is src_2
+ rle16_t rle = src_1->runs[rlepos];
+ int32_t start = rle.value;
+ int32_t end = start + rle.length + 1;
+ int32_t xstart = src_2->array[xrlepos];
+
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
+ if (end <= xstart) {
+ // output the first run
+ answer->runs[answer->n_runs++] =
+ MAKE_RLE16(start, end - start - 1);
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ } else if (xstart + 1 <= start) {
+ // exit the second run
+ xrlepos++;
+ if (xrlepos < src_2->cardinality) {
+ xstart = src_2->array[xrlepos];
+ }
+ } else {
+ if (start < xstart) {
+ answer->runs[answer->n_runs++] =
+ MAKE_RLE16(start, xstart - start - 1);
+ }
+ if (xstart + 1 < end) {
+ start = xstart + 1;
+ } else {
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ }
+ }
+ }
+ if (rlepos < src_1->n_runs) {
+ answer->runs[answer->n_runs++] = MAKE_RLE16(start, end - start - 1);
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos,
+ (src_1->n_runs - rlepos) * sizeof(rle16_t));
+ answer->n_runs += (src_1->n_runs - rlepos);
+ }
+ }
+ uint8_t return_type;
+ *dst = convert_run_to_efficient_container(answer, &return_type);
+ if (answer != *dst) run_container_free(answer);
+ return return_type;
+ }
+ // else it's a bitmap or array
+
+ if (card <= DEFAULT_MAX_SIZE) {
+ array_container_t *ac = array_container_create_given_capacity(card);
+ // nb Java code used a generic iterator-based merge to compute
+ // difference
+ ac->cardinality = run_array_array_subtract(src_1, src_2, ac);
+ *dst = ac;
+ return ARRAY_CONTAINER_TYPE;
+ }
+ bitset_container_t *ans = bitset_container_from_run(src_1);
+ bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
+ return (result_is_bitset ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE);
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+int run_array_container_iandnot(
+ run_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ // dummy implementation same as June 2016 Java
+ int ans = run_array_container_andnot(src_1, src_2, dst);
+ run_container_free(src_1);
+ return ans;
+}
+
+/* dst must be a valid array container, allowed to be src_1 */
+
+void array_run_container_andnot(const array_container_t *src_1,
+ const run_container_t *src_2,
+ array_container_t *dst) {
+ // basically following Java impl as of June 2016
+ if (src_1->cardinality > dst->capacity) {
+ array_container_grow(dst, src_1->cardinality, false);
+ }
+
+ if (src_2->n_runs == 0) {
+ memmove(dst->array, src_1->array,
+ sizeof(uint16_t) * src_1->cardinality);
+ dst->cardinality = src_1->cardinality;
+ return;
+ }
+ int32_t run_start = src_2->runs[0].value;
+ int32_t run_end = run_start + src_2->runs[0].length;
+ int which_run = 0;
+
+ uint16_t val = 0;
+ int dest_card = 0;
+ int i; for (i = 0; i < src_1->cardinality; ++i) {
+ val = src_1->array[i];
+ if (val < run_start)
+ dst->array[dest_card++] = val;
+ else if (val <= run_end) {
+ ; // omitted item
+ } else {
+ do {
+ if (which_run + 1 < src_2->n_runs) {
+ ++which_run;
+ run_start = src_2->runs[which_run].value;
+ run_end = run_start + src_2->runs[which_run].length;
+
+ } else
+ run_start = run_end = (1 << 16) + 1;
+ } while (val > run_end);
+ --i;
+ }
+ }
+ dst->cardinality = dest_card;
+}
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+void array_run_container_iandnot(array_container_t *src_1,
+ const run_container_t *src_2) {
+ array_run_container_andnot(src_1, src_2, src_1);
+}
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int run_run_container_andnot(
+ const run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ run_container_t *ans = run_container_create();
+ run_container_andnot(src_1, src_2, ans);
+ uint8_t typecode_after;
+ *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
+ return typecode_after;
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+int run_run_container_iandnot(
+ run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ // following Java impl as of June 2016 (dummy)
+ int ans = run_run_container_andnot(src_1, src_2, dst);
+ run_container_free(src_1);
+ return ans;
+}
+
+/*
+ * dst is a valid array container and may be the same as src_1
+ */
+
+void array_array_container_andnot(const array_container_t *src_1,
+ const array_container_t *src_2,
+ array_container_t *dst) {
+ array_container_andnot(src_1, src_2, dst);
+}
+
+/* inplace array-array andnot will always be able to reuse the space of
+ * src_1 */
+void array_array_container_iandnot(array_container_t *src_1,
+ const array_container_t *src_2) {
+ array_container_andnot(src_1, src_2, src_1);
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). Return value is
+ * "dst is a bitset"
+ */
+
+bool bitset_bitset_container_andnot(
+ const bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ bitset_container_t *ans = bitset_container_create();
+ int card = bitset_container_andnot(src_1, src_2, ans);
+ if (card <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(ans);
+ bitset_container_free(ans);
+ return false; // not bitset
+ } else {
+ *dst = ans;
+ return true;
+ }
+}
+
+/* Compute the andnot of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_bitset_container_iandnot(
+ bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ int card = bitset_container_andnot(src_1, src_2, src_1);
+ if (card <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(src_1);
+ bitset_container_free(src_1);
+ return false; // not bitset
+ } else {
+ *dst = src_1;
+ return true;
+ }
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_andnot.c */
+/* begin file src/containers/mixed_equal.c */
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+bool array_container_equal_bitset(const array_container_t* container1,
+ const bitset_container_t* container2) {
+ if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ if (container2->cardinality != container1->cardinality) {
+ return false;
+ }
+ }
+ int32_t pos = 0;
+ int32_t i; for (i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
+ uint64_t w = container2->words[i];
+ while (w != 0) {
+ uint64_t t = w & (~w + 1);
+ uint16_t r = i * 64 + __builtin_ctzll(w);
+ if (pos >= container1->cardinality) {
+ return false;
+ }
+ if (container1->array[pos] != r) {
+ return false;
+ }
+ ++pos;
+ w ^= t;
+ }
+ }
+ return (pos == container1->cardinality);
+}
+
+bool run_container_equals_array(const run_container_t* container1,
+ const array_container_t* container2) {
+ if (run_container_cardinality(container1) != container2->cardinality)
+ return false;
+ int32_t pos = 0;
+ int i; for (i = 0; i < container1->n_runs; ++i) {
+ const uint32_t run_start = container1->runs[i].value;
+ const uint32_t le = container1->runs[i].length;
+
+ if (container2->array[pos] != run_start) {
+ return false;
+ }
+
+ if (container2->array[pos + le] != run_start + le) {
+ return false;
+ }
+
+ pos += le + 1;
+ }
+ return true;
+}
+
+bool run_container_equals_bitset(const run_container_t* container1,
+ const bitset_container_t* container2) {
+
+ int run_card = run_container_cardinality(container1);
+ int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ?
+ container2->cardinality :
+ bitset_container_compute_cardinality(container2);
+ if (bitset_card != run_card) {
+ return false;
+ }
+
+ int32_t i; for (i = 0; i < container1->n_runs; i++) {
+ uint32_t begin = container1->runs[i].value;
+ if (container1->runs[i].length) {
+ uint32_t end = begin + container1->runs[i].length + 1;
+ if (!bitset_container_contains_range(container2, begin, end)) {
+ return false;
+ }
+ } else {
+ if (!bitset_container_contains(container2, begin)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_equal.c */
+/* begin file src/containers/mixed_intersection.c */
+/*
+ * mixed_intersection.c
+ *
+ */
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * dst. */
+void array_bitset_container_intersection(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ array_container_t *dst) {
+ if (dst->capacity < src_1->cardinality) {
+ array_container_grow(dst, src_1->cardinality, false);
+ }
+ int32_t newcard = 0; // dst could be src_1
+ const int32_t origcard = src_1->cardinality;
+ int i; for (i = 0; i < origcard; ++i) {
+ uint16_t key = src_1->array[i];
+ // this branchless approach is much faster...
+ dst->array[newcard] = key;
+ newcard += bitset_container_contains(src_2, key);
+ /**
+ * we could do it this way instead...
+ * if (bitset_container_contains(src_2, key)) {
+ * dst->array[newcard++] = key;
+ * }
+ * but if the result is unpredictible, the processor generates
+ * many mispredicted branches.
+ * Difference can be huge (from 3 cycles when predictible all the way
+ * to 16 cycles when unpredictible.
+ * See
+ * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c
+ */
+ }
+ dst->cardinality = newcard;
+}
+
+/* Compute the size of the intersection of src_1 and src_2. */
+int array_bitset_container_intersection_cardinality(
+ const array_container_t *src_1, const bitset_container_t *src_2) {
+ int32_t newcard = 0;
+ const int32_t origcard = src_1->cardinality;
+ int i; for (i = 0; i < origcard; ++i) {
+ uint16_t key = src_1->array[i];
+ newcard += bitset_container_contains(src_2, key);
+ }
+ return newcard;
+}
+
+
+bool array_bitset_container_intersect(const array_container_t *src_1,
+ const bitset_container_t *src_2) {
+ const int32_t origcard = src_1->cardinality;
+ int i; for (i = 0; i < origcard; ++i) {
+ uint16_t key = src_1->array[i];
+ if(bitset_container_contains(src_2, key)) return true;
+ }
+ return false;
+}
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
+ * valid container. */
+void array_run_container_intersection(const array_container_t *src_1,
+ const run_container_t *src_2,
+ array_container_t *dst) {
+ if (run_container_is_full(src_2)) {
+ if (dst != src_1) array_container_copy(src_1, dst);
+ return;
+ }
+ if (dst->capacity < src_1->cardinality) {
+ array_container_grow(dst, src_1->cardinality, false);
+ }
+ if (src_2->n_runs == 0) {
+ return;
+ }
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ rle16_t rle = src_2->runs[rlepos];
+ int32_t newcard = 0;
+ while (arraypos < src_1->cardinality) {
+ const uint16_t arrayval = src_1->array[arraypos];
+ while (rle.value + rle.length <
+ arrayval) { // this will frequently be false
+ ++rlepos;
+ if (rlepos == src_2->n_runs) {
+ dst->cardinality = newcard;
+ return; // we are done
+ }
+ rle = src_2->runs[rlepos];
+ }
+ if (rle.value > arrayval) {
+ arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
+ rle.value);
+ } else {
+ dst->array[newcard] = arrayval;
+ newcard++;
+ arraypos++;
+ }
+ }
+ dst->cardinality = newcard;
+}
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * *dst. If the result is true then the result is a bitset_container_t
+ * otherwise is a array_container_t. If *dst == src_2, an in-place processing
+ * is attempted.*/
+bool run_bitset_container_intersection(
+ const run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ if (run_container_is_full(src_1)) {
+ if (*dst != src_2) *dst = bitset_container_clone(src_2);
+ return true;
+ }
+ int32_t card = run_container_cardinality(src_1);
+ if (card <= DEFAULT_MAX_SIZE) {
+ // result can only be an array (assuming that we never make a
+ // RunContainer)
+ if (card > src_2->cardinality) {
+ card = src_2->cardinality;
+ }
+ array_container_t *answer = array_container_create_given_capacity(card);
+ *dst = answer;
+ if (*dst == NULL) {
+ return false;
+ }
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ uint32_t endofrun = (uint32_t)rle.value + rle.length;
+ uint32_t runValue; for (runValue = rle.value; runValue <= endofrun;
+ ++runValue) {
+ answer->array[answer->cardinality] = (uint16_t)runValue;
+ answer->cardinality +=
+ bitset_container_contains(src_2, runValue);
+ }
+ }
+ return false;
+ }
+ if (*dst == src_2) { // we attempt in-place
+ bitset_container_t *answer = CAST_bitset(*dst);
+ uint32_t start = 0;
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ const rle16_t rle = src_1->runs[rlepos];
+ uint32_t end = rle.value;
+ bitset_reset_range(src_2->words, start, end);
+
+ start = end + rle.length + 1;
+ }
+ bitset_reset_range(src_2->words, start, UINT32_C(1) << 16);
+ answer->cardinality = bitset_container_compute_cardinality(answer);
+ if (src_2->cardinality > DEFAULT_MAX_SIZE) {
+ return true;
+ } else {
+ array_container_t *newanswer = array_container_from_bitset(src_2);
+ if (newanswer == NULL) {
+ *dst = NULL;
+ return false;
+ }
+ *dst = newanswer;
+ return false;
+ }
+ } else { // no inplace
+ // we expect the answer to be a bitmap (if we are lucky)
+ bitset_container_t *answer = bitset_container_clone(src_2);
+
+ *dst = answer;
+ if (answer == NULL) {
+ return true;
+ }
+ uint32_t start = 0;
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ const rle16_t rle = src_1->runs[rlepos];
+ uint32_t end = rle.value;
+ bitset_reset_range(answer->words, start, end);
+ start = end + rle.length + 1;
+ }
+ bitset_reset_range(answer->words, start, UINT32_C(1) << 16);
+ answer->cardinality = bitset_container_compute_cardinality(answer);
+
+ if (answer->cardinality > DEFAULT_MAX_SIZE) {
+ return true;
+ } else {
+ array_container_t *newanswer = array_container_from_bitset(answer);
+ bitset_container_free(CAST_bitset(*dst));
+ if (newanswer == NULL) {
+ *dst = NULL;
+ return false;
+ }
+ *dst = newanswer;
+ return false;
+ }
+ }
+}
+
+/* Compute the size of the intersection between src_1 and src_2 . */
+int array_run_container_intersection_cardinality(const array_container_t *src_1,
+ const run_container_t *src_2) {
+ if (run_container_is_full(src_2)) {
+ return src_1->cardinality;
+ }
+ if (src_2->n_runs == 0) {
+ return 0;
+ }
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ rle16_t rle = src_2->runs[rlepos];
+ int32_t newcard = 0;
+ while (arraypos < src_1->cardinality) {
+ const uint16_t arrayval = src_1->array[arraypos];
+ while (rle.value + rle.length <
+ arrayval) { // this will frequently be false
+ ++rlepos;
+ if (rlepos == src_2->n_runs) {
+ return newcard; // we are done
+ }
+ rle = src_2->runs[rlepos];
+ }
+ if (rle.value > arrayval) {
+ arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
+ rle.value);
+ } else {
+ newcard++;
+ arraypos++;
+ }
+ }
+ return newcard;
+}
+
+/* Compute the intersection between src_1 and src_2
+ **/
+int run_bitset_container_intersection_cardinality(
+ const run_container_t *src_1, const bitset_container_t *src_2) {
+ if (run_container_is_full(src_1)) {
+ return bitset_container_cardinality(src_2);
+ }
+ int answer = 0;
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ answer +=
+ bitset_lenrange_cardinality(src_2->words, rle.value, rle.length);
+ }
+ return answer;
+}
+
+
+bool array_run_container_intersect(const array_container_t *src_1,
+ const run_container_t *src_2) {
+ if( run_container_is_full(src_2) ) {
+ return !array_container_empty(src_1);
+ }
+ if (src_2->n_runs == 0) {
+ return false;
+ }
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ rle16_t rle = src_2->runs[rlepos];
+ while (arraypos < src_1->cardinality) {
+ const uint16_t arrayval = src_1->array[arraypos];
+ while (rle.value + rle.length <
+ arrayval) { // this will frequently be false
+ ++rlepos;
+ if (rlepos == src_2->n_runs) {
+ return false; // we are done
+ }
+ rle = src_2->runs[rlepos];
+ }
+ if (rle.value > arrayval) {
+ arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
+ rle.value);
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Compute the intersection between src_1 and src_2
+ **/
+bool run_bitset_container_intersect(const run_container_t *src_1,
+ const bitset_container_t *src_2) {
+ if( run_container_is_full(src_1) ) {
+ return !bitset_container_empty(src_2);
+ }
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ if(!bitset_lenrange_empty(src_2->words, rle.value,rle.length)) return true;
+ }
+ return false;
+}
+
+/*
+ * Compute the intersection between src_1 and src_2 and write the result
+ * to *dst. If the return function is true, the result is a bitset_container_t
+ * otherwise is a array_container_t.
+ */
+bool bitset_bitset_container_intersection(
+ const bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ const int newCardinality = bitset_container_and_justcard(src_1, src_2);
+ if (newCardinality > DEFAULT_MAX_SIZE) {
+ *dst = bitset_container_create();
+ if (*dst != NULL) {
+ bitset_container_and_nocard(src_1, src_2, CAST_bitset(*dst));
+ CAST_bitset(*dst)->cardinality = newCardinality;
+ }
+ return true; // it is a bitset
+ }
+ *dst = array_container_create_given_capacity(newCardinality);
+ if (*dst != NULL) {
+ CAST_array(*dst)->cardinality = newCardinality;
+ bitset_extract_intersection_setbits_uint16(
+ src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+ CAST_array(*dst)->array, 0);
+ }
+ return false; // not a bitset
+}
+
+bool bitset_bitset_container_intersection_inplace(
+ bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ const int newCardinality = bitset_container_and_justcard(src_1, src_2);
+ if (newCardinality > DEFAULT_MAX_SIZE) {
+ *dst = src_1;
+ bitset_container_and_nocard(src_1, src_2, src_1);
+ CAST_bitset(*dst)->cardinality = newCardinality;
+ return true; // it is a bitset
+ }
+ *dst = array_container_create_given_capacity(newCardinality);
+ if (*dst != NULL) {
+ CAST_array(*dst)->cardinality = newCardinality;
+ bitset_extract_intersection_setbits_uint16(
+ src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+ CAST_array(*dst)->array, 0);
+ }
+ return false; // not a bitset
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_intersection.c */
+/* begin file src/containers/mixed_negation.c */
+/*
+ * mixed_negation.c
+ *
+ */
+
+#include <assert.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// TODO: make simplified and optimized negation code across
+// the full range.
+
+/* Negation across the entire range of the container.
+ * Compute the negation of src and write the result
+ * to *dst. The complement of a
+ * sufficiently sparse set will always be dense and a hence a bitmap
+' * We assume that dst is pre-allocated and a valid bitset container
+ * There can be no in-place version.
+ */
+void array_container_negation(const array_container_t *src,
+ bitset_container_t *dst) {
+ uint64_t card = UINT64_C(1 << 16);
+ bitset_container_set_all(dst);
+
+ if (src->cardinality == 0) {
+ return;
+ }
+
+ dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array,
+ (uint64_t)src->cardinality);
+}
+
+/* Negation across the entire range of the container
+ * Compute the negation of src and write the result
+ * to *dst. A true return value indicates a bitset result,
+ * otherwise the result is an array container.
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_container_negation(
+ const bitset_container_t *src, container_t **dst
+){
+ return bitset_container_negation_range(src, 0, (1 << 16), dst);
+}
+
+/* inplace version */
+/*
+ * Same as bitset_container_negation except that if the output is to
+ * be a
+ * bitset_container_t, then src is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_container_negation_inplace(
+ bitset_container_t *src, container_t **dst
+){
+ return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
+}
+
+/* Negation across the entire range of container
+ * Compute the negation of src and write the result
+ * to *dst. Return values are the *_TYPECODES as defined * in containers.h
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+int run_container_negation(const run_container_t *src, container_t **dst) {
+ return run_container_negation_range(src, 0, (1 << 16), dst);
+}
+
+/*
+ * Same as run_container_negation except that if the output is to
+ * be a
+ * run_container_t, and has the capacity to hold the result,
+ * then src is modified and no allocation is made.
+ * In all cases, the result is in *dst.
+ */
+int run_container_negation_inplace(run_container_t *src, container_t **dst) {
+ return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
+}
+
+/* Negation across a range of the container.
+ * Compute the negation of src and write the result
+ * to *dst. Returns true if the result is a bitset container
+ * and false for an array container. *dst is not preallocated.
+ */
+bool array_container_negation_range(
+ const array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ /* close port of the Java implementation */
+ if (range_start >= range_end) {
+ *dst = array_container_clone(src);
+ return false;
+ }
+
+ int32_t start_index =
+ binarySearch(src->array, src->cardinality, (uint16_t)range_start);
+ if (start_index < 0) start_index = -start_index - 1;
+
+ int32_t last_index =
+ binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
+ if (last_index < 0) last_index = -last_index - 2;
+
+ const int32_t current_values_in_range = last_index - start_index + 1;
+ const int32_t span_to_be_flipped = range_end - range_start;
+ const int32_t new_values_in_range =
+ span_to_be_flipped - current_values_in_range;
+ const int32_t cardinality_change =
+ new_values_in_range - current_values_in_range;
+ const int32_t new_cardinality = src->cardinality + cardinality_change;
+
+ if (new_cardinality > DEFAULT_MAX_SIZE) {
+ bitset_container_t *temp = bitset_container_from_array(src);
+ bitset_flip_range(temp->words, (uint32_t)range_start,
+ (uint32_t)range_end);
+ temp->cardinality = new_cardinality;
+ *dst = temp;
+ return true;
+ }
+
+ array_container_t *arr =
+ array_container_create_given_capacity(new_cardinality);
+ *dst = (container_t *)arr;
+ if(new_cardinality == 0) {
+ arr->cardinality = new_cardinality;
+ return false; // we are done.
+ }
+ // copy stuff before the active area
+ memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
+
+ // work on the range
+ int32_t out_pos = start_index, in_pos = start_index;
+ int32_t val_in_range = range_start;
+ for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
+ if ((uint16_t)val_in_range != src->array[in_pos]) {
+ arr->array[out_pos++] = (uint16_t)val_in_range;
+ } else {
+ ++in_pos;
+ }
+ }
+ for (; val_in_range < range_end; ++val_in_range)
+ arr->array[out_pos++] = (uint16_t)val_in_range;
+
+ // content after the active range
+ memcpy(arr->array + out_pos, src->array + (last_index + 1),
+ (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
+ arr->cardinality = new_cardinality;
+ return false;
+}
+
+/* Even when the result would fit, it is unclear how to make an
+ * inplace version without inefficient copying.
+ */
+
+bool array_container_negation_range_inplace(
+ array_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ bool ans = array_container_negation_range(src, range_start, range_end, dst);
+ // TODO : try a real inplace version
+ array_container_free(src);
+ return ans;
+}
+
+/* Negation across a range of the container
+ * Compute the negation of src and write the result
+ * to *dst. A true return value indicates a bitset result,
+ * otherwise the result is an array container.
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+bool bitset_container_negation_range(
+ const bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ // TODO maybe consider density-based estimate
+ // and sometimes build result directly as array, with
+ // conversion back to bitset if wrong. Or determine
+ // actual result cardinality, then go directly for the known final cont.
+
+ // keep computation using bitsets as long as possible.
+ bitset_container_t *t = bitset_container_clone(src);
+ bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end);
+ t->cardinality = bitset_container_compute_cardinality(t);
+
+ if (t->cardinality > DEFAULT_MAX_SIZE) {
+ *dst = t;
+ return true;
+ } else {
+ *dst = array_container_from_bitset(t);
+ bitset_container_free(t);
+ return false;
+ }
+}
+
+/* inplace version */
+/*
+ * Same as bitset_container_negation except that if the output is to
+ * be a
+ * bitset_container_t, then src is modified and no allocation is made.
+ * If the output is to be an array_container_t, then caller is responsible
+ * to free the container.
+ * In all cases, the result is in *dst.
+ */
+bool bitset_container_negation_range_inplace(
+ bitset_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end);
+ src->cardinality = bitset_container_compute_cardinality(src);
+ if (src->cardinality > DEFAULT_MAX_SIZE) {
+ *dst = src;
+ return true;
+ }
+ *dst = array_container_from_bitset(src);
+ bitset_container_free(src);
+ return false;
+}
+
+/* Negation across a range of container
+ * Compute the negation of src and write the result
+ * to *dst. Return values are the *_TYPECODES as defined * in containers.h
+ * We assume that dst is not pre-allocated. In
+ * case of failure, *dst will be NULL.
+ */
+int run_container_negation_range(
+ const run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ uint8_t return_typecode;
+
+ // follows the Java implementation
+ if (range_end <= range_start) {
+ *dst = run_container_clone(src);
+ return RUN_CONTAINER_TYPE;
+ }
+
+ run_container_t *ans = run_container_create_given_capacity(
+ src->n_runs + 1); // src->n_runs + 1);
+ int k = 0;
+ for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
+ ans->runs[k] = src->runs[k];
+ ans->n_runs++;
+ }
+
+ run_container_smart_append_exclusive(
+ ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
+
+ for (; k < src->n_runs; ++k) {
+ run_container_smart_append_exclusive(ans, src->runs[k].value,
+ src->runs[k].length);
+ }
+
+ *dst = convert_run_to_efficient_container(ans, &return_typecode);
+ if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
+
+ return return_typecode;
+}
+
+/*
+ * Same as run_container_negation except that if the output is to
+ * be a
+ * run_container_t, and has the capacity to hold the result,
+ * then src is modified and no allocation is made.
+ * In all cases, the result is in *dst.
+ */
+int run_container_negation_range_inplace(
+ run_container_t *src,
+ const int range_start, const int range_end,
+ container_t **dst
+){
+ uint8_t return_typecode;
+
+ if (range_end <= range_start) {
+ *dst = src;
+ return RUN_CONTAINER_TYPE;
+ }
+
+ // TODO: efficient special case when range is 0 to 65535 inclusive
+
+ if (src->capacity == src->n_runs) {
+ // no excess room. More checking to see if result can fit
+ bool last_val_before_range = false;
+ bool first_val_in_range = false;
+ bool last_val_in_range = false;
+ bool first_val_past_range = false;
+
+ if (range_start > 0)
+ last_val_before_range =
+ run_container_contains(src, (uint16_t)(range_start - 1));
+ first_val_in_range = run_container_contains(src, (uint16_t)range_start);
+
+ if (last_val_before_range == first_val_in_range) {
+ last_val_in_range =
+ run_container_contains(src, (uint16_t)(range_end - 1));
+ if (range_end != 0x10000)
+ first_val_past_range =
+ run_container_contains(src, (uint16_t)range_end);
+
+ if (last_val_in_range ==
+ first_val_past_range) { // no space for inplace
+ int ans = run_container_negation_range(src, range_start,
+ range_end, dst);
+ run_container_free(src);
+ return ans;
+ }
+ }
+ }
+ // all other cases: result will fit
+
+ run_container_t *ans = src;
+ int my_nbr_runs = src->n_runs;
+
+ ans->n_runs = 0;
+ int k = 0;
+ for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
+ // ans->runs[k] = src->runs[k]; (would be self-copy)
+ ans->n_runs++;
+ }
+
+ // as with Java implementation, use locals to give self a buffer of depth 1
+ rle16_t buffered = MAKE_RLE16(0, 0);
+ rle16_t next = buffered;
+ if (k < my_nbr_runs) buffered = src->runs[k];
+
+ run_container_smart_append_exclusive(
+ ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
+
+ for (; k < my_nbr_runs; ++k) {
+ if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
+
+ run_container_smart_append_exclusive(ans, buffered.value,
+ buffered.length);
+ buffered = next;
+ }
+
+ *dst = convert_run_to_efficient_container(ans, &return_typecode);
+ if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
+
+ return return_typecode;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_negation.c */
+/* begin file src/containers/mixed_subset.c */
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+bool array_container_is_subset_bitset(const array_container_t* container1,
+ const bitset_container_t* container2) {
+ if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ if (container2->cardinality < container1->cardinality) {
+ return false;
+ }
+ }
+ int i; for (i = 0; i < container1->cardinality; ++i) {
+ if (!bitset_container_contains(container2, container1->array[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool run_container_is_subset_array(const run_container_t* container1,
+ const array_container_t* container2) {
+ if (run_container_cardinality(container1) > container2->cardinality)
+ return false;
+ int32_t start_pos = -1, stop_pos = -1;
+ int i; for (i = 0; i < container1->n_runs; ++i) {
+ int32_t start = container1->runs[i].value;
+ int32_t stop = start + container1->runs[i].length;
+ start_pos = advanceUntil(container2->array, stop_pos,
+ container2->cardinality, start);
+ stop_pos = advanceUntil(container2->array, stop_pos,
+ container2->cardinality, stop);
+ if (stop_pos == container2->cardinality) {
+ return false;
+ } else if (stop_pos - start_pos != stop - start ||
+ container2->array[start_pos] != start ||
+ container2->array[stop_pos] != stop) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool array_container_is_subset_run(const array_container_t* container1,
+ const run_container_t* container2) {
+ if (container1->cardinality > run_container_cardinality(container2))
+ return false;
+ int i_array = 0, i_run = 0;
+ while (i_array < container1->cardinality && i_run < container2->n_runs) {
+ uint32_t start = container2->runs[i_run].value;
+ uint32_t stop = start + container2->runs[i_run].length;
+ if (container1->array[i_array] < start) {
+ return false;
+ } else if (container1->array[i_array] > stop) {
+ i_run++;
+ } else { // the value of the array is in the run
+ i_array++;
+ }
+ }
+ if (i_array == container1->cardinality) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool run_container_is_subset_bitset(const run_container_t* container1,
+ const bitset_container_t* container2) {
+ // todo: this code could be much faster
+ if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ if (container2->cardinality < run_container_cardinality(container1)) {
+ return false;
+ }
+ } else {
+ int32_t card = bitset_container_compute_cardinality(
+ container2); // modify container2?
+ if (card < run_container_cardinality(container1)) {
+ return false;
+ }
+ }
+ int i; for (i = 0; i < container1->n_runs; ++i) {
+ uint32_t run_start = container1->runs[i].value;
+ uint32_t le = container1->runs[i].length;
+ uint32_t j;for ( j = run_start; j <= run_start + le; ++j) {
+ if (!bitset_container_contains(container2, j)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool bitset_container_is_subset_run(const bitset_container_t* container1,
+ const run_container_t* container2) {
+ // todo: this code could be much faster
+ if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ if (container1->cardinality > run_container_cardinality(container2)) {
+ return false;
+ }
+ }
+ int32_t i_bitset = 0, i_run = 0;
+ while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS &&
+ i_run < container2->n_runs) {
+ uint64_t w = container1->words[i_bitset];
+ while (w != 0 && i_run < container2->n_runs) {
+ uint32_t start = container2->runs[i_run].value;
+ uint32_t stop = start + container2->runs[i_run].length;
+ uint64_t t = w & (~w + 1);
+ uint16_t r = i_bitset * 64 + __builtin_ctzll(w);
+ if (r < start) {
+ return false;
+ } else if (r > stop) {
+ i_run++;
+ continue;
+ } else {
+ w ^= t;
+ }
+ }
+ if (w == 0) {
+ i_bitset++;
+ } else {
+ return false;
+ }
+ }
+ if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) {
+ // terminated iterating on the run containers, check that rest of bitset
+ // is empty
+ for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) {
+ if (container1->words[i_bitset] != 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_subset.c */
+/* begin file src/containers/mixed_union.c */
+/*
+ * mixed_union.c
+ *
+ */
+
+#include <assert.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. */
+void array_bitset_container_union(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ dst->cardinality = (int32_t)bitset_set_list_withcard(
+ dst->words, dst->cardinality, src_1->array, src_1->cardinality);
+}
+
+/* Compute the union of src_1 and src_2 and write the result to
+ * dst. It is allowed for src_2 to be dst. This version does not
+ * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
+void array_bitset_container_lazy_union(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ bitset_set_list(dst->words, src_1->array, src_1->cardinality);
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+}
+
+void run_bitset_container_union(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ assert(!run_container_is_full(src_1)); // catch this case upstream
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ bitset_set_lenrange(dst->words, rle.value, rle.length);
+ }
+ dst->cardinality = bitset_container_compute_cardinality(dst);
+}
+
+void run_bitset_container_lazy_union(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ assert(!run_container_is_full(src_1)); // catch this case upstream
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ bitset_set_lenrange(dst->words, rle.value, rle.length);
+ }
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+}
+
+// why do we leave the result as a run container??
+void array_run_container_union(const array_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst) {
+ if (run_container_is_full(src_2)) {
+ run_container_copy(src_2, dst);
+ return;
+ }
+ // TODO: see whether the "2*" is spurious
+ run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false);
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ rle16_t previousrle;
+ if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
+ previousrle = run_container_append_first(dst, src_2->runs[rlepos]);
+ rlepos++;
+ } else {
+ previousrle =
+ run_container_append_value_first(dst, src_1->array[arraypos]);
+ arraypos++;
+ }
+ while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
+ if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
+ run_container_append(dst, src_2->runs[rlepos], &previousrle);
+ rlepos++;
+ } else {
+ run_container_append_value(dst, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ }
+ if (arraypos < src_1->cardinality) {
+ while (arraypos < src_1->cardinality) {
+ run_container_append_value(dst, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ } else {
+ while (rlepos < src_2->n_runs) {
+ run_container_append(dst, src_2->runs[rlepos], &previousrle);
+ rlepos++;
+ }
+ }
+}
+
+void array_run_container_inplace_union(const array_container_t *src_1,
+ run_container_t *src_2) {
+ if (run_container_is_full(src_2)) {
+ return;
+ }
+ const int32_t maxoutput = src_1->cardinality + src_2->n_runs;
+ const int32_t neededcapacity = maxoutput + src_2->n_runs;
+ if (src_2->capacity < neededcapacity)
+ run_container_grow(src_2, neededcapacity, true);
+ memmove(src_2->runs + maxoutput, src_2->runs,
+ src_2->n_runs * sizeof(rle16_t));
+ rle16_t *inputsrc2 = src_2->runs + maxoutput;
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ int src2nruns = src_2->n_runs;
+ src_2->n_runs = 0;
+
+ rle16_t previousrle;
+
+ if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
+ previousrle = run_container_append_first(src_2, inputsrc2[rlepos]);
+ rlepos++;
+ } else {
+ previousrle =
+ run_container_append_value_first(src_2, src_1->array[arraypos]);
+ arraypos++;
+ }
+
+ while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) {
+ if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
+ run_container_append(src_2, inputsrc2[rlepos], &previousrle);
+ rlepos++;
+ } else {
+ run_container_append_value(src_2, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ }
+ if (arraypos < src_1->cardinality) {
+ while (arraypos < src_1->cardinality) {
+ run_container_append_value(src_2, src_1->array[arraypos],
+ &previousrle);
+ arraypos++;
+ }
+ } else {
+ while (rlepos < src2nruns) {
+ run_container_append(src_2, inputsrc2[rlepos], &previousrle);
+ rlepos++;
+ }
+ }
+}
+
+bool array_array_container_union(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ if (totalCardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_create_given_capacity(totalCardinality);
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
+ ourbitset->words, src_1->cardinality, src_2->array,
+ src_2->cardinality);
+ if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // need to convert!
+ *dst = array_container_from_bitset(ourbitset);
+ bitset_container_free(ourbitset);
+ returnval = false; // not going to be a bitset
+ }
+ }
+ return returnval;
+}
+
+bool array_array_container_inplace_union(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ *dst = NULL;
+ if (totalCardinality <= DEFAULT_MAX_SIZE) {
+ if(src_1->capacity < totalCardinality) {
+ *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ } else {
+ memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
+ src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+ src_2->array, src_2->cardinality, src_1->array);
+ return false; // not a bitset
+ }
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
+ ourbitset->words, src_1->cardinality, src_2->array,
+ src_2->cardinality);
+ if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // need to convert!
+ if(src_1->capacity < ourbitset->cardinality) {
+ array_container_grow(src_1, ourbitset->cardinality, false);
+ }
+
+ bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+ src_1->array, 0);
+ src_1->cardinality = ourbitset->cardinality;
+ *dst = src_1;
+ bitset_container_free(ourbitset);
+ returnval = false; // not going to be a bitset
+ }
+ }
+ return returnval;
+}
+
+
+bool array_array_container_lazy_union(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
+ *dst = array_container_create_given_capacity(totalCardinality);
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality);
+ ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
+ }
+ return returnval;
+}
+
+
+bool array_array_container_lazy_inplace_union(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ *dst = NULL;
+ if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
+ if(src_1->capacity < totalCardinality) {
+ *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
+ if (*dst != NULL) {
+ array_container_union(src_1, src_2, CAST_array(*dst));
+ } else {
+ return true; // otherwise failure won't be caught
+ }
+ return false; // not a bitset
+ } else {
+ memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
+ src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+ src_2->array, src_2->cardinality, src_1->array);
+ return false; // not a bitset
+ }
+ }
+ *dst = bitset_container_create();
+ bool returnval = true; // expect a bitset
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
+ bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality);
+ ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
+ }
+ return returnval;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_union.c */
+/* begin file src/containers/mixed_xor.c */
+/*
+ * mixed_xor.c
+ */
+
+#include <assert.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst (which has no container initially).
+ * Result is true iff dst is a bitset */
+bool array_bitset_container_xor(
+ const array_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ bitset_container_t *result = bitset_container_create();
+ bitset_container_copy(src_2, result);
+ result->cardinality = (int32_t)bitset_flip_list_withcard(
+ result->words, result->cardinality, src_1->array, src_1->cardinality);
+
+ // do required type conversions.
+ if (result->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(result);
+ bitset_container_free(result);
+ return false; // not bitset
+ }
+ *dst = result;
+ return true; // bitset
+}
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst. It is allowed for src_2 to be dst. This version does not
+ * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
+ */
+
+void array_bitset_container_lazy_xor(const array_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ bitset_flip_list(dst->words, src_1->array, src_1->cardinality);
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+}
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_xor(
+ const run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ bitset_container_t *result = bitset_container_create();
+
+ bitset_container_copy(src_2, result);
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ bitset_flip_range(result->words, rle.value,
+ rle.value + rle.length + UINT32_C(1));
+ }
+ result->cardinality = bitset_container_compute_cardinality(result);
+
+ if (result->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(result);
+ bitset_container_free(result);
+ return false; // not bitset
+ }
+ *dst = result;
+ return true; // bitset
+}
+
+/* lazy xor. Dst is initialized and may be equal to src_2.
+ * Result is left as a bitset container, even if actual
+ * cardinality would dictate an array container.
+ */
+
+void run_bitset_container_lazy_xor(const run_container_t *src_1,
+ const bitset_container_t *src_2,
+ bitset_container_t *dst) {
+ if (src_2 != dst) bitset_container_copy(src_2, dst);
+ int32_t rlepos; for (rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
+ rle16_t rle = src_1->runs[rlepos];
+ bitset_flip_range(dst->words, rle.value,
+ rle.value + rle.length + UINT32_C(1));
+ }
+ dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
+}
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int array_run_container_xor(
+ const array_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ // semi following Java XOR implementation as of May 2016
+ // the C OR implementation works quite differently and can return a run
+ // container
+ // TODO could optimize for full run containers.
+
+ // use of lazy following Java impl.
+ const int arbitrary_threshold = 32;
+ if (src_1->cardinality < arbitrary_threshold) {
+ run_container_t *ans = run_container_create();
+ array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs.
+ uint8_t typecode_after;
+ *dst =
+ convert_run_to_efficient_container_and_free(ans, &typecode_after);
+ return typecode_after;
+ }
+
+ int card = run_container_cardinality(src_2);
+ if (card <= DEFAULT_MAX_SIZE) {
+ // Java implementation works with the array, xoring the run elements via
+ // iterator
+ array_container_t *temp = array_container_from_run(src_2);
+ bool ret_is_bitset = array_array_container_xor(temp, src_1, dst);
+ array_container_free(temp);
+ return ret_is_bitset ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE;
+
+ } else { // guess that it will end up as a bitset
+ bitset_container_t *result = bitset_container_from_run(src_2);
+ bool is_bitset = bitset_array_container_ixor(result, src_1, dst);
+ // any necessary type conversion has been done by the ixor
+ int retval = (is_bitset ? BITSET_CONTAINER_TYPE
+ : ARRAY_CONTAINER_TYPE);
+ return retval;
+ }
+}
+
+/* Dst is a valid run container. (Can it be src_2? Let's say not.)
+ * Leaves result as run container, even if other options are
+ * smaller.
+ */
+
+void array_run_container_lazy_xor(const array_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst) {
+ run_container_grow(dst, src_1->cardinality + src_2->n_runs, false);
+ int32_t rlepos = 0;
+ int32_t arraypos = 0;
+ dst->n_runs = 0;
+
+ while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
+ if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
+ run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
+ src_2->runs[rlepos].length);
+ rlepos++;
+ } else {
+ run_container_smart_append_exclusive(dst, src_1->array[arraypos],
+ 0);
+ arraypos++;
+ }
+ }
+ while (arraypos < src_1->cardinality) {
+ run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0);
+ arraypos++;
+ }
+ while (rlepos < src_2->n_runs) {
+ run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
+ src_2->runs[rlepos].length);
+ rlepos++;
+ }
+}
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int run_run_container_xor(
+ const run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ run_container_t *ans = run_container_create();
+ run_container_xor(src_1, src_2, ans);
+ uint8_t typecode_after;
+ *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
+ return typecode_after;
+}
+
+/*
+ * Java implementation (as of May 2016) for array_run, run_run
+ * and bitset_run don't do anything different for inplace.
+ * Could adopt the mixed_union.c approach instead (ie, using
+ * smart_append_exclusive)
+ *
+ */
+
+bool array_array_container_xor(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality =
+ src_1->cardinality + src_2->cardinality; // upper bound
+ if (totalCardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_create_given_capacity(totalCardinality);
+ array_container_xor(src_1, src_2, CAST_array(*dst));
+ return false; // not a bitset
+ }
+ *dst = bitset_container_from_array(src_1);
+ bool returnval = true; // expect a bitset
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard(
+ ourbitset->words, src_1->cardinality, src_2->array, src_2->cardinality);
+ if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
+ // need to convert!
+ *dst = array_container_from_bitset(ourbitset);
+ bitset_container_free(ourbitset);
+ returnval = false; // not going to be a bitset
+ }
+
+ return returnval;
+}
+
+bool array_array_container_lazy_xor(
+ const array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int totalCardinality = src_1->cardinality + src_2->cardinality;
+ // upper bound, but probably poor estimate for xor
+ if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
+ *dst = array_container_create_given_capacity(totalCardinality);
+ if (*dst != NULL)
+ array_container_xor(src_1, src_2, CAST_array(*dst));
+ return false; // not a bitset
+ }
+ *dst = bitset_container_from_array(src_1);
+ bool returnval = true; // expect a bitset (maybe, for XOR??)
+ if (*dst != NULL) {
+ bitset_container_t *ourbitset = CAST_bitset(*dst);
+ bitset_flip_list(ourbitset->words, src_2->array, src_2->cardinality);
+ ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
+ }
+ return returnval;
+}
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst (which has no container initially). Return value is
+ * "dst is a bitset"
+ */
+
+bool bitset_bitset_container_xor(
+ const bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ bitset_container_t *ans = bitset_container_create();
+ int card = bitset_container_xor(src_1, src_2, ans);
+ if (card <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(ans);
+ bitset_container_free(ans);
+ return false; // not bitset
+ } else {
+ *dst = ans;
+ return true;
+ }
+}
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst (which has no container initially). It will modify src_1
+ * to be dst if the result is a bitset. Otherwise, it will
+ * free src_1 and dst will be a new array container. In both
+ * cases, the caller is responsible for deallocating dst.
+ * Returns true iff dst is a bitset */
+
+bool bitset_array_container_ixor(
+ bitset_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ *dst = src_1;
+ src_1->cardinality = (uint32_t)bitset_flip_list_withcard(
+ src_1->words, src_1->cardinality, src_2->array, src_2->cardinality);
+
+ if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(src_1);
+ bitset_container_free(src_1);
+ return false; // not bitset
+ } else
+ return true;
+}
+
+/* a bunch of in-place, some of which may not *really* be inplace.
+ * TODO: write actual inplace routine if efficiency warrants it
+ * Anything inplace with a bitset is a good candidate
+ */
+
+bool bitset_bitset_container_ixor(
+ bitset_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ int card = bitset_container_xor(src_1, src_2, src_1);
+ if (card <= DEFAULT_MAX_SIZE) {
+ *dst = array_container_from_bitset(src_1);
+ bitset_container_free(src_1);
+ return false; // not bitset
+ } else {
+ *dst = src_1;
+ return true;
+ }
+}
+
+bool array_bitset_container_ixor(
+ array_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ bool ans = array_bitset_container_xor(src_1, src_2, dst);
+ array_container_free(src_1);
+ return ans;
+}
+
+/* Compute the xor of src_1 and src_2 and write the result to
+ * dst. Result may be either a bitset or an array container
+ * (returns "result is bitset"). dst does not initially have
+ * any container, but becomes either a bitset container (return
+ * result true) or an array container.
+ */
+
+bool run_bitset_container_ixor(
+ run_container_t *src_1, const bitset_container_t *src_2,
+ container_t **dst
+){
+ bool ans = run_bitset_container_xor(src_1, src_2, dst);
+ run_container_free(src_1);
+ return ans;
+}
+
+bool bitset_run_container_ixor(
+ bitset_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ bool ans = run_bitset_container_xor(src_2, src_1, dst);
+ bitset_container_free(src_1);
+ return ans;
+}
+
+/* dst does not indicate a valid container initially. Eventually it
+ * can become any kind of container.
+ */
+
+int array_run_container_ixor(
+ array_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ int ans = array_run_container_xor(src_1, src_2, dst);
+ array_container_free(src_1);
+ return ans;
+}
+
+int run_array_container_ixor(
+ run_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ int ans = array_run_container_xor(src_2, src_1, dst);
+ run_container_free(src_1);
+ return ans;
+}
+
+bool array_array_container_ixor(
+ array_container_t *src_1, const array_container_t *src_2,
+ container_t **dst
+){
+ bool ans = array_array_container_xor(src_1, src_2, dst);
+ array_container_free(src_1);
+ return ans;
+}
+
+int run_run_container_ixor(
+ run_container_t *src_1, const run_container_t *src_2,
+ container_t **dst
+){
+ int ans = run_run_container_xor(src_1, src_2, dst);
+ run_container_free(src_1);
+ return ans;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/mixed_xor.c */
+/* begin file src/containers/run.c */
+#include <stdio.h>
+#include <stdlib.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+extern inline uint16_t run_container_minimum(const run_container_t *run);
+extern inline uint16_t run_container_maximum(const run_container_t *run);
+extern inline int32_t interleavedBinarySearch(const rle16_t *array,
+ int32_t lenarray, uint16_t ikey);
+extern inline bool run_container_contains(const run_container_t *run,
+ uint16_t pos);
+extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
+extern inline bool run_container_is_full(const run_container_t *run);
+extern inline bool run_container_nonzero_cardinality(const run_container_t *rc);
+extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
+extern inline run_container_t *run_container_create_range(uint32_t start,
+ uint32_t stop);
+extern inline int run_container_cardinality(const run_container_t *run);
+
+
+bool run_container_add(run_container_t *run, uint16_t pos) {
+ int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
+ if (index >= 0) return false; // already there
+ index = -index - 2; // points to preceding value, possibly -1
+ if (index >= 0) { // possible match
+ int32_t offset = pos - run->runs[index].value;
+ int32_t le = run->runs[index].length;
+ if (offset <= le) return false; // already there
+ if (offset == le + 1) {
+ // we may need to fuse
+ if (index + 1 < run->n_runs) {
+ if (run->runs[index + 1].value == pos + 1) {
+ // indeed fusion is needed
+ run->runs[index].length = run->runs[index + 1].value +
+ run->runs[index + 1].length -
+ run->runs[index].value;
+ recoverRoomAtIndex(run, (uint16_t)(index + 1));
+ return true;
+ }
+ }
+ run->runs[index].length++;
+ return true;
+ }
+ if (index + 1 < run->n_runs) {
+ // we may need to fuse
+ if (run->runs[index + 1].value == pos + 1) {
+ // indeed fusion is needed
+ run->runs[index + 1].value = pos;
+ run->runs[index + 1].length = run->runs[index + 1].length + 1;
+ return true;
+ }
+ }
+ }
+ if (index == -1) {
+ // we may need to extend the first run
+ if (0 < run->n_runs) {
+ if (run->runs[0].value == pos + 1) {
+ run->runs[0].length++;
+ run->runs[0].value--;
+ return true;
+ }
+ }
+ }
+ makeRoomAtIndex(run, (uint16_t)(index + 1));
+ run->runs[index + 1].value = pos;
+ run->runs[index + 1].length = 0;
+ return true;
+}
+
+/* Create a new run container. Return NULL in case of failure. */
+run_container_t *run_container_create_given_capacity(int32_t size) {
+ run_container_t *run;
+ /* Allocate the run container itself. */
+ if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == NULL) {
+ return NULL;
+ }
+ if (size <= 0 ) { // we don't want to rely on malloc(0)
+ run->runs = NULL;
+ } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * size)) == NULL) {
+ roaring_free(run);
+ return NULL;
+ }
+ run->capacity = size;
+ run->n_runs = 0;
+ return run;
+}
+
+int run_container_shrink_to_fit(run_container_t *src) {
+ if (src->n_runs == src->capacity) return 0; // nothing to do
+ int savings = src->capacity - src->n_runs;
+ src->capacity = src->n_runs;
+ rle16_t *oldruns = src->runs;
+ src->runs = (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t));
+ if (src->runs == NULL) roaring_free(oldruns); // should never happen?
+ return savings;
+}
+/* Create a new run container. Return NULL in case of failure. */
+run_container_t *run_container_create(void) {
+ return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
+}
+
+run_container_t *run_container_clone(const run_container_t *src) {
+ run_container_t *run = run_container_create_given_capacity(src->capacity);
+ if (run == NULL) return NULL;
+ run->capacity = src->capacity;
+ run->n_runs = src->n_runs;
+ memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
+ return run;
+}
+
+void run_container_offset(const run_container_t *c,
+ container_t **loc, container_t **hic,
+ uint16_t offset) {
+ run_container_t *lo = NULL, *hi = NULL;
+
+ bool split;
+ int lo_cap, hi_cap;
+ int top, pivot;
+
+ top = (1 << 16) - offset;
+ pivot = run_container_index_equalorlarger(c, top);
+
+ if (pivot == -1) {
+ split = false;
+ lo_cap = c->n_runs;
+ hi_cap = 0;
+ } else {
+ split = c->runs[pivot].value <= top;
+ lo_cap = pivot + (split ? 1 : 0);
+ hi_cap = c->n_runs - pivot;
+ }
+
+ if (loc && lo_cap) {
+ lo = run_container_create_given_capacity(lo_cap);
+ memcpy(lo->runs, c->runs, lo_cap*sizeof(rle16_t));
+ lo->n_runs = lo_cap;
+ int i; for (i = 0; i < lo_cap; ++i) {
+ lo->runs[i].value += offset;
+ }
+ *loc = (container_t*)lo;
+ }
+
+ if (hic && hi_cap) {
+ hi = run_container_create_given_capacity(hi_cap);
+ memcpy(hi->runs, c->runs+pivot, hi_cap*sizeof(rle16_t));
+ hi->n_runs = hi_cap;
+ int i; for (i = 0; i < hi_cap; ++i) {
+ hi->runs[i].value += offset;
+ }
+ *hic = (container_t*)hi;
+ }
+
+ // Fix the split.
+ if (split) {
+ if (lo != NULL) {
+ // Add the missing run to 'lo', exhausting length.
+ lo->runs[lo->n_runs-1].length = (1 << 16) - lo->runs[lo->n_runs-1].value - 1;
+ }
+
+ if (hi != NULL) {
+ // Fix the first run in 'hi'.
+ hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1;
+ hi->runs[0].value = 0;
+ }
+ }
+}
+
+/* Free memory. */
+void run_container_free(run_container_t *run) {
+ if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
+ roaring_free(run->runs);
+ run->runs = NULL; // pedantic
+ }
+ roaring_free(run);
+}
+
+void run_container_grow(run_container_t *run, int32_t min, bool copy) {
+ int32_t newCapacity =
+ (run->capacity == 0)
+ ? RUN_DEFAULT_INIT_SIZE
+ : run->capacity < 64 ? run->capacity * 2
+ : run->capacity < 1024 ? run->capacity * 3 / 2
+ : run->capacity * 5 / 4;
+ if (newCapacity < min) newCapacity = min;
+ run->capacity = newCapacity;
+ assert(run->capacity >= min);
+ if (copy) {
+ rle16_t *oldruns = run->runs;
+ run->runs =
+ (rle16_t *)roaring_realloc(oldruns, run->capacity * sizeof(rle16_t));
+ if (run->runs == NULL) roaring_free(oldruns);
+ } else {
+ // Jon Strabala reports that some tools complain otherwise
+ if (run->runs != NULL) {
+ roaring_free(run->runs);
+ }
+ run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
+ }
+ // handle the case where realloc fails
+ if (run->runs == NULL) {
+ fprintf(stderr, "could not allocate memory\n");
+ }
+ assert(run->runs != NULL);
+}
+
+/* copy one container into another */
+void run_container_copy(const run_container_t *src, run_container_t *dst) {
+ const int32_t n_runs = src->n_runs;
+ if (src->n_runs > dst->capacity) {
+ run_container_grow(dst, n_runs, false);
+ }
+ dst->n_runs = n_runs;
+ memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
+}
+
+/* Compute the union of `src_1' and `src_2' and write the result to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void run_container_union(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst) {
+ // TODO: this could be a lot more efficient
+
+ // we start out with inexpensive checks
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ run_container_copy(src_1, dst);
+ return;
+ }
+ if (if2) {
+ run_container_copy(src_2, dst);
+ return;
+ }
+ }
+ const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
+ if (dst->capacity < neededcapacity)
+ run_container_grow(dst, neededcapacity, false);
+ dst->n_runs = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+
+ rle16_t previousrle;
+ if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
+ previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
+ rlepos++;
+ } else {
+ previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
+ xrlepos++;
+ }
+
+ while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
+ rle16_t newrl;
+ if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
+ newrl = src_1->runs[rlepos];
+ rlepos++;
+ } else {
+ newrl = src_2->runs[xrlepos];
+ xrlepos++;
+ }
+ run_container_append(dst, newrl, &previousrle);
+ }
+ while (xrlepos < src_2->n_runs) {
+ run_container_append(dst, src_2->runs[xrlepos], &previousrle);
+ xrlepos++;
+ }
+ while (rlepos < src_1->n_runs) {
+ run_container_append(dst, src_1->runs[rlepos], &previousrle);
+ rlepos++;
+ }
+}
+
+/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
+ */
+void run_container_union_inplace(run_container_t *src_1,
+ const run_container_t *src_2) {
+ // TODO: this could be a lot more efficient
+
+ // we start out with inexpensive checks
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ return;
+ }
+ if (if2) {
+ run_container_copy(src_2, src_1);
+ return;
+ }
+ }
+ // we move the data to the end of the current array
+ const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
+ const int32_t neededcapacity = maxoutput + src_1->n_runs;
+ if (src_1->capacity < neededcapacity)
+ run_container_grow(src_1, neededcapacity, true);
+ memmove(src_1->runs + maxoutput, src_1->runs,
+ src_1->n_runs * sizeof(rle16_t));
+ rle16_t *inputsrc1 = src_1->runs + maxoutput;
+ const int32_t input1nruns = src_1->n_runs;
+ src_1->n_runs = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+
+ rle16_t previousrle;
+ if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
+ previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
+ rlepos++;
+ } else {
+ previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
+ xrlepos++;
+ }
+ while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
+ rle16_t newrl;
+ if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
+ newrl = inputsrc1[rlepos];
+ rlepos++;
+ } else {
+ newrl = src_2->runs[xrlepos];
+ xrlepos++;
+ }
+ run_container_append(src_1, newrl, &previousrle);
+ }
+ while (xrlepos < src_2->n_runs) {
+ run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
+ xrlepos++;
+ }
+ while (rlepos < input1nruns) {
+ run_container_append(src_1, inputsrc1[rlepos], &previousrle);
+ rlepos++;
+ }
+}
+
+/* Compute the symmetric difference of `src_1' and `src_2' and write the result
+ * to `dst'
+ * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
+void run_container_xor(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst) {
+ // don't bother to convert xor with full range into negation
+ // since negation is implemented similarly
+
+ const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
+ if (dst->capacity < neededcapacity)
+ run_container_grow(dst, neededcapacity, false);
+
+ int32_t pos1 = 0;
+ int32_t pos2 = 0;
+ dst->n_runs = 0;
+
+ while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
+ if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
+ run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
+ src_1->runs[pos1].length);
+ pos1++;
+ } else {
+ run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
+ src_2->runs[pos2].length);
+ pos2++;
+ }
+ }
+ while (pos1 < src_1->n_runs) {
+ run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
+ src_1->runs[pos1].length);
+ pos1++;
+ }
+
+ while (pos2 < src_2->n_runs) {
+ run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
+ src_2->runs[pos2].length);
+ pos2++;
+ }
+}
+
+/* Compute the intersection of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+void run_container_intersection(const run_container_t *src_1,
+ const run_container_t *src_2,
+ run_container_t *dst) {
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ run_container_copy(src_2, dst);
+ return;
+ }
+ if (if2) {
+ run_container_copy(src_1, dst);
+ return;
+ }
+ }
+ // TODO: this could be a lot more efficient, could use SIMD optimizations
+ const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
+ if (dst->capacity < neededcapacity)
+ run_container_grow(dst, neededcapacity, false);
+ dst->n_runs = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+ int32_t start = src_1->runs[rlepos].value;
+ int32_t end = start + src_1->runs[rlepos].length + 1;
+ int32_t xstart = src_2->runs[xrlepos].value;
+ int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
+ if (end <= xstart) {
+ ++rlepos;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ } else if (xend <= start) {
+ ++xrlepos;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else { // they overlap
+ const int32_t lateststart = start > xstart ? start : xstart;
+ int32_t earliestend;
+ if (end == xend) { // improbable
+ earliestend = end;
+ rlepos++;
+ xrlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else if (end < xend) {
+ earliestend = end;
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+
+ } else { // end > xend
+ earliestend = xend;
+ xrlepos++;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ }
+ dst->runs[dst->n_runs].value = (uint16_t)lateststart;
+ dst->runs[dst->n_runs].length =
+ (uint16_t)(earliestend - lateststart - 1);
+ dst->n_runs++;
+ }
+ }
+}
+
+/* Compute the size of the intersection of src_1 and src_2 . */
+int run_container_intersection_cardinality(const run_container_t *src_1,
+ const run_container_t *src_2) {
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ return run_container_cardinality(src_2);
+ }
+ if (if2) {
+ return run_container_cardinality(src_1);
+ }
+ }
+ int answer = 0;
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+ int32_t start = src_1->runs[rlepos].value;
+ int32_t end = start + src_1->runs[rlepos].length + 1;
+ int32_t xstart = src_2->runs[xrlepos].value;
+ int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
+ if (end <= xstart) {
+ ++rlepos;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ } else if (xend <= start) {
+ ++xrlepos;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else { // they overlap
+ const int32_t lateststart = start > xstart ? start : xstart;
+ int32_t earliestend;
+ if (end == xend) { // improbable
+ earliestend = end;
+ rlepos++;
+ xrlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else if (end < xend) {
+ earliestend = end;
+ rlepos++;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+
+ } else { // end > xend
+ earliestend = xend;
+ xrlepos++;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ }
+ answer += earliestend - lateststart;
+ }
+ }
+ return answer;
+}
+
+bool run_container_intersect(const run_container_t *src_1,
+ const run_container_t *src_2) {
+ const bool if1 = run_container_is_full(src_1);
+ const bool if2 = run_container_is_full(src_2);
+ if (if1 || if2) {
+ if (if1) {
+ return !run_container_empty(src_2);
+ }
+ if (if2) {
+ return !run_container_empty(src_1);
+ }
+ }
+ int32_t rlepos = 0;
+ int32_t xrlepos = 0;
+ int32_t start = src_1->runs[rlepos].value;
+ int32_t end = start + src_1->runs[rlepos].length + 1;
+ int32_t xstart = src_2->runs[xrlepos].value;
+ int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
+ while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
+ if (end <= xstart) {
+ ++rlepos;
+ if (rlepos < src_1->n_runs) {
+ start = src_1->runs[rlepos].value;
+ end = start + src_1->runs[rlepos].length + 1;
+ }
+ } else if (xend <= start) {
+ ++xrlepos;
+ if (xrlepos < src_2->n_runs) {
+ xstart = src_2->runs[xrlepos].value;
+ xend = xstart + src_2->runs[xrlepos].length + 1;
+ }
+ } else { // they overlap
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/* Compute the difference of src_1 and src_2 and write the result to
+ * dst. It is assumed that dst is distinct from both src_1 and src_2. */
+void run_container_andnot(const run_container_t *src_1,
+ const run_container_t *src_2, run_container_t *dst) {
+ // following Java implementation as of June 2016
+
+ if (dst->capacity < src_1->n_runs + src_2->n_runs)
+ run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
+
+ dst->n_runs = 0;
+
+ int rlepos1 = 0;
+ int rlepos2 = 0;
+ int32_t start = src_1->runs[rlepos1].value;
+ int32_t end = start + src_1->runs[rlepos1].length + 1;
+ int32_t start2 = src_2->runs[rlepos2].value;
+ int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
+
+ while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
+ if (end <= start2) {
+ // output the first run
+ dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1);
+ rlepos1++;
+ if (rlepos1 < src_1->n_runs) {
+ start = src_1->runs[rlepos1].value;
+ end = start + src_1->runs[rlepos1].length + 1;
+ }
+ } else if (end2 <= start) {
+ // exit the second run
+ rlepos2++;
+ if (rlepos2 < src_2->n_runs) {
+ start2 = src_2->runs[rlepos2].value;
+ end2 = start2 + src_2->runs[rlepos2].length + 1;
+ }
+ } else {
+ if (start < start2) {
+ dst->runs[dst->n_runs++] =
+ MAKE_RLE16(start, start2 - start - 1);
+ }
+ if (end2 < end) {
+ start = end2;
+ } else {
+ rlepos1++;
+ if (rlepos1 < src_1->n_runs) {
+ start = src_1->runs[rlepos1].value;
+ end = start + src_1->runs[rlepos1].length + 1;
+ }
+ }
+ }
+ }
+ if (rlepos1 < src_1->n_runs) {
+ dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1);
+ rlepos1++;
+ if (rlepos1 < src_1->n_runs) {
+ memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
+ sizeof(rle16_t) * (src_1->n_runs - rlepos1));
+ dst->n_runs += src_1->n_runs - rlepos1;
+ }
+ }
+}
+
+ALLOW_UNALIGNED
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
+ uint32_t base) {
+ int outpos = 0;
+ uint32_t *out = (uint32_t *)vout;
+ int i; for (i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ int j;for (j = 0; j <= le; ++j) {
+ uint32_t val = run_start + j;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ }
+ return outpos;
+}
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+/*
+ * Print this container using printf (useful for debugging).
+ */
+void run_container_printf(const run_container_t *cont) {
+ int i; for (i = 0; i < cont->n_runs; ++i) {
+ uint16_t run_start = cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ printf("[%d,%d]", run_start, run_start + le);
+ }
+}
+
+/*
+ * Print this container using printf as a comma-separated list of 32-bit
+ * integers starting at base.
+ */
+void run_container_printf_as_uint32_array(const run_container_t *cont,
+ uint32_t base) {
+ if (cont->n_runs == 0) return;
+ {
+ uint32_t run_start = base + cont->runs[0].value;
+ uint16_t le = cont->runs[0].length;
+ printf("%u", run_start);
+ uint32_t j;for ( j = 1; j <= le; ++j) printf(",%u", run_start + j);
+ }
+ int32_t i; for (i = 1; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ uint32_t j;for ( j = 0; j <= le; ++j) printf(",%u", run_start + j);
+ }
+}
+#endif
+
+int32_t run_container_write(const run_container_t *container, char *buf) {
+ uint16_t cast_16 = container->n_runs;
+ memcpy(buf, &cast_16, sizeof(uint16_t));
+ memcpy(buf + sizeof(uint16_t), container->runs,
+ container->n_runs * sizeof(rle16_t));
+ return run_container_size_in_bytes(container);
+}
+
+int32_t run_container_read(int32_t cardinality, run_container_t *container,
+ const char *buf) {
+ (void)cardinality;
+ uint16_t cast_16;
+ memcpy(&cast_16, buf, sizeof(uint16_t));
+ container->n_runs = cast_16;
+ if (container->n_runs > container->capacity)
+ run_container_grow(container, container->n_runs, false);
+ if(container->n_runs > 0) {
+ memcpy(container->runs, buf + sizeof(uint16_t),
+ container->n_runs * sizeof(rle16_t));
+ }
+ return run_container_size_in_bytes(container);
+}
+
+bool run_container_iterate(const run_container_t *cont, uint32_t base,
+ roaring_iterator iterator, void *ptr) {
+ int i; for (i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+
+ int j;for (j = 0; j <= le; ++j)
+ if (!iterator(run_start + j, ptr)) return false;
+ }
+ return true;
+}
+
+bool run_container_iterate64(const run_container_t *cont, uint32_t base,
+ roaring_iterator64 iterator, uint64_t high_bits,
+ void *ptr) {
+ int i; for (i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+
+ int j;for (j = 0; j <= le; ++j)
+ if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
+ return false;
+ }
+ return true;
+}
+
+bool run_container_is_subset(const run_container_t *container1,
+ const run_container_t *container2) {
+ int i1 = 0, i2 = 0;
+ while (i1 < container1->n_runs && i2 < container2->n_runs) {
+ int start1 = container1->runs[i1].value;
+ int stop1 = start1 + container1->runs[i1].length;
+ int start2 = container2->runs[i2].value;
+ int stop2 = start2 + container2->runs[i2].length;
+ if (start1 < start2) {
+ return false;
+ } else { // start1 >= start2
+ if (stop1 < stop2) {
+ i1++;
+ } else if (stop1 == stop2) {
+ i1++;
+ i2++;
+ } else { // stop1 > stop2
+ i2++;
+ }
+ }
+ }
+ if (i1 == container1->n_runs) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// TODO: write smart_append_exclusive version to match the overloaded 1 param
+// Java version (or is it even used?)
+
+// follows the Java implementation closely
+// length is the rle-value. Ie, run [10,12) uses a length value 1.
+void run_container_smart_append_exclusive(run_container_t *src,
+ const uint16_t start,
+ const uint16_t length) {
+ int old_end;
+ rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
+ rle16_t *appended_last_run = src->runs + src->n_runs;
+
+ if (!src->n_runs ||
+ (start > (old_end = last_run->value + last_run->length + 1))) {
+ *appended_last_run = MAKE_RLE16(start, length);
+ src->n_runs++;
+ return;
+ }
+ if (old_end == start) {
+ // we merge
+ last_run->length += (length + 1);
+ return;
+ }
+ int new_end = start + length + 1;
+
+ if (start == last_run->value) {
+ // wipe out previous
+ if (new_end < old_end) {
+ *last_run = MAKE_RLE16(new_end, old_end - new_end - 1);
+ return;
+ } else if (new_end > old_end) {
+ *last_run = MAKE_RLE16(old_end, new_end - old_end - 1);
+ return;
+ } else {
+ src->n_runs--;
+ return;
+ }
+ }
+ last_run->length = start - last_run->value - 1;
+ if (new_end < old_end) {
+ *appended_last_run = MAKE_RLE16(new_end, old_end - new_end - 1);
+ src->n_runs++;
+ } else if (new_end > old_end) {
+ *appended_last_run = MAKE_RLE16(old_end, new_end - old_end - 1);
+ src->n_runs++;
+ }
+}
+
+bool run_container_select(const run_container_t *container,
+ uint32_t *start_rank, uint32_t rank,
+ uint32_t *element) {
+ int i; for (i = 0; i < container->n_runs; i++) {
+ uint16_t length = container->runs[i].length;
+ if (rank <= *start_rank + length) {
+ uint16_t value = container->runs[i].value;
+ *element = value + rank - (*start_rank);
+ return true;
+ } else
+ *start_rank += length + 1;
+ }
+ return false;
+}
+
+int run_container_rank(const run_container_t *container, uint16_t x) {
+ int sum = 0;
+ uint32_t x32 = x;
+ int i; for (i = 0; i < container->n_runs; i++) {
+ uint32_t startpoint = container->runs[i].value;
+ uint32_t length = container->runs[i].length;
+ uint32_t endpoint = length + startpoint;
+ if (x <= endpoint) {
+ if (x < startpoint) break;
+ return sum + (x32 - startpoint) + 1;
+ } else {
+ sum += length + 1;
+ }
+ }
+ return sum;
+}
+
+#ifdef CROARING_IS_X64
+
+CROARING_TARGET_AVX2
+ALLOW_UNALIGNED
+/* Get the cardinality of `run'. Requires an actual computation. */
+static inline int _avx2_run_container_cardinality(const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ int32_t k = 0;
+ const int32_t step = sizeof(__m256i) / sizeof(rle16_t);
+ if (n_runs > step) {
+ __m256i total = _mm256_setzero_si256();
+ for (; k + step <= n_runs; k += step) {
+ __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k));
+ __m256i justlengths = _mm256_srli_epi32(ymm1, 16);
+ total = _mm256_add_epi32(total, justlengths);
+ }
+ // a store might be faster than extract?
+ uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
+ _mm256_storeu_si256((__m256i *)buffer, total);
+ sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
+ (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
+ }
+ for (; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
+
+CROARING_UNTARGET_REGION
+
+/* Get the cardinality of `run'. Requires an actual computation. */
+static inline int _scalar_run_container_cardinality(const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ for (int k = 0; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
+
+int run_container_cardinality(const run_container_t *run) {
+ if( croaring_avx2() ) {
+ return _avx2_run_container_cardinality(run);
+ } else {
+ return _scalar_run_container_cardinality(run);
+ }
+}
+#else
+
+/* Get the cardinality of `run'. Requires an actual computation. */
+int run_container_cardinality(const run_container_t *run) {
+ const int32_t n_runs = run->n_runs;
+ const rle16_t *runs = run->runs;
+
+ /* by initializing with n_runs, we omit counting the +1 for each pair. */
+ int sum = n_runs;
+ int k; for ( k = 0; k < n_runs; ++k) {
+ sum += runs[k].length;
+ }
+
+ return sum;
+}
+#endif
+
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/containers/run.c */
+/* begin file src/memory.c */
+#include <stdlib.h>
+
+// without the following, we get lots of warnings about posix_memalign
+#ifndef __cplusplus
+extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
+#endif //__cplusplus // C++ does not have a well defined signature
+
+// portable version of posix_memalign
+static void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
+ void *p;
+#ifdef _MSC_VER
+ p = _aligned_malloc(size, alignment);
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ p = __mingw_aligned_malloc(size, alignment);
+#else
+ // somehow, if this is used before including "x86intrin.h", it creates an
+ // implicit defined warning.
+ if (posix_memalign(&p, alignment, size) != 0) return NULL;
+#endif
+ return p;
+}
+
+static void roaring_bitmap_aligned_free(void *memblock) {
+#ifdef _MSC_VER
+ _aligned_free(memblock);
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ __mingw_aligned_free(memblock);
+#else
+ free(memblock);
+#endif
+}
+
+static roaring_memory_t global_memory_hook = {
+ .malloc = malloc,
+ .realloc = realloc,
+ .calloc = calloc,
+ .free = free,
+ .aligned_malloc = roaring_bitmap_aligned_malloc,
+ .aligned_free = roaring_bitmap_aligned_free,
+};
+
+void roaring_init_memory_hook(roaring_memory_t memory_hook) {
+ global_memory_hook = memory_hook;
+}
+
+void* roaring_malloc(size_t n) {
+ return global_memory_hook.malloc(n);
+}
+
+void* roaring_realloc(void* p, size_t new_sz) {
+ return global_memory_hook.realloc(p, new_sz);
+}
+
+void* roaring_calloc(size_t n_elements, size_t element_size) {
+ return global_memory_hook.calloc(n_elements, element_size);
+}
+
+void roaring_free(void* p) {
+ global_memory_hook.free(p);
+}
+
+void* roaring_aligned_malloc(size_t alignment, size_t size) {
+ return global_memory_hook.aligned_malloc(alignment, size);
+}
+
+void roaring_aligned_free(void* p) {
+ global_memory_hook.aligned_free(p);
+}
+/* end file src/memory.c */
+/* begin file src/roaring.c */
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+
+
+#ifdef __cplusplus
+using namespace ::roaring::internal;
+
+extern "C" { namespace roaring { namespace api {
+#endif
+
+#define CROARING_SERIALIZATION_ARRAY_UINT32 1
+#define CROARING_SERIALIZATION_CONTAINER 2
+
+extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
+extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
+
+static inline bool is_cow(const roaring_bitmap_t *r) {
+ return r->high_low_container.flags & ROARING_FLAG_COW;
+}
+static inline bool is_frozen(const roaring_bitmap_t *r) {
+ return r->high_low_container.flags & ROARING_FLAG_FROZEN;
+}
+
+// this is like roaring_bitmap_add, but it populates pointer arguments in such a
+// way
+// that we can recover the container touched, which, in turn can be used to
+// accelerate some functions (when you repeatedly need to add to the same
+// container)
+static inline container_t *containerptr_roaring_bitmap_add(
+ roaring_bitmap_t *r, uint32_t val,
+ uint8_t *type, int *index
+){
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint16_t hb = val >> 16;
+ const int i = ra_get_index(ra, hb);
+ if (i >= 0) {
+ ra_unshare_container_at_index(ra, i);
+ container_t *c = ra_get_container_at_index(ra, i, type);
+ uint8_t new_type = *type;
+ container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
+ *index = i;
+ if (c2 != c) {
+ container_free(c, *type);
+ ra_set_container_at_index(ra, i, c2, new_type);
+ *type = new_type;
+ return c2;
+ } else {
+ return c;
+ }
+ } else {
+ array_container_t *new_ac = array_container_create();
+ container_t *c = container_add(new_ac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, type);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
+ *index = -i - 1;
+ return c;
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (!ans) {
+ return NULL;
+ }
+ bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ return ans;
+}
+
+bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
+ return ra_init_with_capacity(&r->high_low_container, cap);
+}
+
+static inline void add_bulk_impl(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val) {
+ uint16_t key = val >> 16;
+ if (context->container == NULL || context->key != key) {
+ uint8_t typecode;
+ int idx;
+ context->container = containerptr_roaring_bitmap_add(
+ r, val, &typecode, &idx);
+ context->typecode = typecode;
+ context->idx = idx;
+ context->key = key;
+ } else {
+ // no need to seek the container, it is at hand
+ // because we already have the container at hand, we can do the
+ // insertion directly, bypassing the roaring_bitmap_add call
+ uint8_t new_typecode;
+ container_t *container2 = container_add(
+ context->container, val & 0xFFFF, context->typecode, &new_typecode);
+ if (container2 != context->container) {
+ // rare instance when we need to change the container type
+ container_free(context->container, context->typecode);
+ ra_set_container_at_index(&r->high_low_container, context->idx,
+ container2, new_typecode);
+ context->typecode = new_typecode;
+ context->container = container2;
+ }
+ }
+}
+
+void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals) {
+ uint32_t val;
+ const uint32_t *start = vals;
+ const uint32_t *end = vals + n_args;
+ const uint32_t *current_val = start;
+
+ if (n_args == 0) {
+ return;
+ }
+
+ uint8_t typecode;
+ int idx;
+ container_t *container;
+ val = *current_val;
+ container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
+ roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode};
+
+ for (; current_val != end; current_val++) {
+ memcpy(&val, current_val, sizeof(val));
+ add_bulk_impl(r, &context, val);
+ }
+}
+
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context, uint32_t val) {
+ add_bulk_impl(r, context, val);
+}
+
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val)
+{
+ uint16_t key = val >> 16;
+ if (context->container == NULL || context->key != key) {
+ int32_t start_idx = -1;
+ if (context->container != NULL && context->key < key) {
+ start_idx = context->idx;
+ }
+ int idx = ra_advance_until(&r->high_low_container, key, start_idx);
+ if (idx == ra_get_size(&r->high_low_container)) {
+ return false;
+ }
+ uint8_t typecode;
+ context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode);
+ context->typecode = typecode;
+ context->idx = idx;
+ context->key = ra_get_key_at_index(&r->high_low_container, idx);
+ // ra_advance_until finds the next key >= the target, we found a later container.
+ if (context->key != key) {
+ return false;
+ }
+ }
+ // context is now set up
+ return container_contains(context->container, val & 0xFFFF, context->typecode);
+}
+
+roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ roaring_bitmap_add_many(answer, n_args, vals);
+ return answer;
+}
+
+roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
+ // todo: could be greatly optimized but we do not expect this call to ever
+ // include long lists
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ roaring_bulk_context_t context;
+ va_list ap;
+
+ memset(&context, 0, sizeof(context));
+ va_start(ap, n_args);
+ size_t i; for ( i = 0; i < n_args; i++) {
+ uint32_t val = va_arg(ap, uint32_t);
+ roaring_bitmap_add_bulk(answer, &context, val);
+ }
+ va_end(ap);
+ return answer;
+}
+
+static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
+ return (a < b) ? a : b;
+}
+
+static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
+ return (a < b) ? a : b;
+}
+
+roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
+ uint32_t step) {
+ if(max >= UINT64_C(0x100000000)) {
+ max = UINT64_C(0x100000000);
+ }
+ if (step == 0) return NULL;
+ if (max <= min) return NULL;
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ if (step >= (1 << 16)) {
+ uint32_t value; for ( value = (uint32_t)min; value < max; value += step) {
+ roaring_bitmap_add(answer, value);
+ }
+ return answer;
+ }
+ uint64_t min_tmp = min;
+ do {
+ uint32_t key = (uint32_t)min_tmp >> 16;
+ uint32_t container_min = min_tmp & 0xFFFF;
+ uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
+ uint8_t type;
+ container_t *container = container_from_range(&type, container_min,
+ container_max, (uint16_t)step);
+ ra_append(&answer->high_low_container, key, container, type);
+ uint32_t gap = container_max - container_min + step - 1;
+ min_tmp += gap - (gap % step);
+ } while (min_tmp < max);
+ // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
+ return answer;
+}
+
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+ if (min > max) {
+ return;
+ }
+
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint32_t min_key = min >> 16;
+ uint32_t max_key = max >> 16;
+
+ int32_t num_required_containers = max_key - min_key + 1;
+ int32_t suffix_length = count_greater(ra->keys, ra->size, max_key);
+ int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length,
+ min_key);
+ int32_t common_length = ra->size - prefix_length - suffix_length;
+
+ if (num_required_containers > common_length) {
+ ra_shift_tail(ra, suffix_length,
+ num_required_containers - common_length);
+ }
+
+ int32_t src = prefix_length + common_length - 1;
+ int32_t dst = ra->size - suffix_length - 1;
+ uint32_t key; for ( key = max_key; key != min_key-1; key--) { // beware of min_key==0
+ uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
+ uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
+ container_t* new_container;
+ uint8_t new_type;
+
+ if (src >= 0 && ra->keys[src] == key) {
+ ra_unshare_container_at_index(ra, src);
+ new_container = container_add_range(ra->containers[src],
+ ra->typecodes[src],
+ container_min, container_max,
+ &new_type);
+ if (new_container != ra->containers[src]) {
+ container_free(ra->containers[src],
+ ra->typecodes[src]);
+ }
+ src--;
+ } else {
+ new_container = container_from_range(&new_type, container_min,
+ container_max+1, 1);
+ }
+ ra_replace_key_and_container_at_index(ra, dst, key, new_container,
+ new_type);
+ dst--;
+ }
+}
+
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+ if (min > max) {
+ return;
+ }
+
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint32_t min_key = min >> 16;
+ uint32_t max_key = max >> 16;
+
+ int32_t src = count_less(ra->keys, ra->size, min_key);
+ int32_t dst = src;
+ while (src < ra->size && ra->keys[src] <= max_key) {
+ uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
+ uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
+ ra_unshare_container_at_index(ra, src);
+ container_t *new_container;
+ uint8_t new_type;
+ new_container = container_remove_range(ra->containers[src],
+ ra->typecodes[src],
+ container_min, container_max,
+ &new_type);
+ if (new_container != ra->containers[src]) {
+ container_free(ra->containers[src],
+ ra->typecodes[src]);
+ }
+ if (new_container) {
+ ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
+ new_container, new_type);
+ dst++;
+ }
+ src++;
+ }
+ if (src > dst) {
+ ra_shift_tail(ra, ra->size - src, dst - src);
+ }
+}
+
+extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+void roaring_bitmap_printf(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ printf("{");
+ int i; for (i = 0; i < ra->size; ++i) {
+ container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+
+ if (i + 1 < ra->size) {
+ printf(",");
+ }
+ }
+ printf("}");
+}
+
+void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ printf("{");
+ int i; for (i = 0; i < ra->size; ++i) {
+ printf("%d: %s (%d)", ra->keys[i],
+ get_full_container_name(ra->containers[i], ra->typecodes[i]),
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]));
+ if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
+ printf(
+ "(shared count = %" PRIu32 " )",
+ CAST_shared(ra->containers[i])->counter);
+ }
+
+ if (i + 1 < ra->size) {
+ printf(", ");
+ }
+ }
+ printf("}");
+}
+#endif
+
+typedef struct min_max_sum_s {
+ uint32_t min;
+ uint32_t max;
+ uint64_t sum;
+} min_max_sum_t;
+
+static bool min_max_sum_fnc(uint32_t value, void *param) {
+ min_max_sum_t *mms = (min_max_sum_t *)param;
+ if (value > mms->max) mms->max = value;
+ if (value < mms->min) mms->min = value;
+ mms->sum += value;
+ return true; // we always process all data points
+}
+
+/**
+* (For advanced users.)
+* Collect statistics about the bitmap
+*/
+void roaring_bitmap_statistics(const roaring_bitmap_t *r,
+ roaring_statistics_t *stat) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ memset(stat, 0, sizeof(*stat));
+ stat->n_containers = ra->size;
+ stat->cardinality = roaring_bitmap_get_cardinality(r);
+ min_max_sum_t mms;
+ mms.min = UINT32_C(0xFFFFFFFF);
+ mms.max = UINT32_C(0);
+ mms.sum = 0;
+ roaring_iterate(r, &min_max_sum_fnc, &mms);
+ stat->min_value = mms.min;
+ stat->max_value = mms.max;
+ stat->sum_value = mms.sum;
+
+ int i; for (i = 0; i < ra->size; ++i) {
+ uint8_t truetype =
+ get_container_type(ra->containers[i], ra->typecodes[i]);
+ uint32_t card =
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]);
+ uint32_t sbytes =
+ container_size_in_bytes(ra->containers[i], ra->typecodes[i]);
+ switch (truetype) {
+ case BITSET_CONTAINER_TYPE:
+ stat->n_bitset_containers++;
+ stat->n_values_bitset_containers += card;
+ stat->n_bytes_bitset_containers += sbytes;
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ stat->n_array_containers++;
+ stat->n_values_array_containers += card;
+ stat->n_bytes_array_containers += sbytes;
+ break;
+ case RUN_CONTAINER_TYPE:
+ stat->n_run_containers++;
+ stat->n_values_run_containers += card;
+ stat->n_bytes_run_containers += sbytes;
+ break;
+ default:
+ assert(false);
+ __builtin_unreachable();
+ }
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (!ans) {
+ return NULL;
+ }
+ if (!ra_init_with_capacity( // allocation of list of containers can fail
+ &ans->high_low_container, r->high_low_container.size)
+ ){
+ roaring_free(ans);
+ return NULL;
+ }
+ if (!ra_overwrite( // memory allocation of individual containers may fail
+ &r->high_low_container, &ans->high_low_container, is_cow(r))
+ ){
+ roaring_bitmap_free(ans); // overwrite should leave in freeable state
+ return NULL;
+ }
+ roaring_bitmap_set_copy_on_write(ans, is_cow(r));
+ return ans;
+}
+
+bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
+ const roaring_bitmap_t *src) {
+ roaring_bitmap_set_copy_on_write(dest, is_cow(src));
+ return ra_overwrite(&src->high_low_container, &dest->high_low_container,
+ is_cow(src));
+}
+
+void roaring_bitmap_free(const roaring_bitmap_t *r) {
+ if (!is_frozen(r)) {
+ ra_clear((roaring_array_t*)&r->high_low_container);
+ }
+ roaring_free((roaring_bitmap_t*)r);
+}
+
+void roaring_bitmap_clear(roaring_bitmap_t *r) {
+ ra_reset(&r->high_low_container);
+}
+
+void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
+ roaring_array_t *ra = &r->high_low_container;
+
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(ra, hb);
+ uint8_t typecode;
+ if (i >= 0) {
+ ra_unshare_container_at_index(ra, i);
+ container_t *container =
+ ra_get_container_at_index(ra, i, &typecode);
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_add(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+ } else {
+ array_container_t *newac = array_container_create();
+ container_t *container = container_add(newac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, &typecode);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
+ container, typecode);
+ }
+}
+
+bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ bool result = false;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+
+ const int oldCardinality =
+ container_get_cardinality(container, typecode);
+
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_add(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ result = true;
+ } else {
+ const int newCardinality =
+ container_get_cardinality(container, newtypecode);
+
+ result = oldCardinality != newCardinality;
+ }
+ } else {
+ array_container_t *newac = array_container_create();
+ container_t *container = container_add(newac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, &typecode);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
+ container, typecode);
+ result = true;
+ }
+
+ return result;
+}
+
+void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_remove(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+ if (container_get_cardinality(container2, newtypecode) != 0) {
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ } else {
+ ra_remove_at_index_and_free(&r->high_low_container, i);
+ }
+ }
+}
+
+bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ bool result = false;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+
+ const int oldCardinality =
+ container_get_cardinality(container, typecode);
+
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_remove(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+
+ const int newCardinality =
+ container_get_cardinality(container2, newtypecode);
+
+ if (newCardinality != 0) {
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ } else {
+ ra_remove_at_index_and_free(&r->high_low_container, i);
+ }
+
+ result = oldCardinality != newCardinality;
+ }
+ return result;
+}
+
+void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals) {
+ if (n_args == 0 || r->high_low_container.size == 0) {
+ return;
+ }
+ int32_t pos = -1; // position of the container used in the previous iteration
+ size_t i; for ( i = 0; i < n_args; i++) {
+ uint16_t key = (uint16_t)(vals[i] >> 16);
+ if (pos < 0 || key != r->high_low_container.keys[pos]) {
+ pos = ra_get_index(&r->high_low_container, key);
+ }
+ if (pos >= 0) {
+ uint8_t new_typecode;
+ container_t *new_container;
+ new_container = container_remove(r->high_low_container.containers[pos],
+ vals[i] & 0xffff,
+ r->high_low_container.typecodes[pos],
+ &new_typecode);
+ if (new_container != r->high_low_container.containers[pos]) {
+ container_free(r->high_low_container.containers[pos],
+ r->high_low_container.typecodes[pos]);
+ ra_replace_key_and_container_at_index(&r->high_low_container,
+ pos, key, new_container,
+ new_typecode);
+ }
+ if (!container_nonzero_cardinality(new_container, new_typecode)) {
+ container_free(new_container, new_typecode);
+ ra_remove_at_index(&r->high_low_container, pos);
+ pos = -1;
+ }
+ }
+ }
+}
+
+// there should be some SIMD optimizations possible here
+roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint32_t neededcap = length1 > length2 ? length2 : length1;
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_and(c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type); // otherwise: memory leak!
+ }
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer;
+}
+
+/**
+ * Compute the union of 'number' bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
+ size_t i; for ( i = 2; i < number; i++) {
+ roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
+ }
+ roaring_bitmap_repair_after_lazy(answer);
+ return answer;
+}
+
+/**
+ * Compute the xor of 'number' bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
+ size_t i; for ( i = 2; i < number; i++) {
+ roaring_bitmap_lazy_xor_inplace(answer, x[i]);
+ }
+ roaring_bitmap_repair_after_lazy(answer);
+ return answer;
+}
+
+// inplace and (modifies its first argument).
+void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ if (x1 == x2) return;
+ int pos1 = 0, pos2 = 0, intersection_size = 0;
+ const int length1 = ra_get_size(&x1->high_low_container);
+ const int length2 = ra_get_size(&x2->high_low_container);
+
+ // any skipped-over or newly emptied containers in x1
+ // have to be freed.
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2, result_type;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+ container_t *c =
+ (type1 == SHARED_CONTAINER_TYPE)
+ ? container_and(c1, type1, c2, type2, &result_type)
+ : container_iand(c1, type1, c2, type2, &result_type);
+
+ if (c != c1) { // in this instance a new container was created, and
+ // we need to free the old one
+ container_free(c1, type1);
+ }
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size, s1, c,
+ result_type);
+ intersection_size++;
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) {
+ pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+
+ // if we ended early because x2 ran out, then all remaining in x1 should be
+ // freed
+ while (pos1 < length1) {
+ container_free(x1->high_low_container.containers[pos1],
+ x1->high_low_container.typecodes[pos1]);
+ ++pos1;
+ }
+
+ // all containers after this have either been copied or freed
+ ra_downsize(&x1->high_low_container, intersection_size);
+}
+
+roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_or(c1, type1, c2, type2, &result_type);
+
+ // since we assume that the initial containers are non-empty, the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ // c1 = container_clone(c1, type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // c2 = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace or (modifies its first argument).
+void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ if (!container_is_full(c1, type1)) {
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c =
+ (type1 == SHARED_CONTAINER_TYPE)
+ ? container_or(c1, type1, c2, type2, &result_type)
+ : container_ior(c1, type1, c2, type2, &result_type);
+
+ if (c != c1) { // in this instance a new container was created,
+ // and we need to free the old one
+ container_free(c1, type1);
+ }
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+
+ // container_t *c2_clone = container_clone(c2, type2);
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_xor(c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace xor (modifies its first argument).
+
+void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+
+ // XOR can have new containers inserted from x2, but can also
+ // lose containers when x1 and x2 are nonempty and identical.
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_xor(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // so release
+ }
+ else {
+ c = container_ixor(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ ++pos1;
+ } else {
+ container_free(c, result_type);
+ ra_remove_at_index(&x1->high_low_container, pos1);
+ --length1;
+ }
+
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2));
+ return empty_bitmap;
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = 0;
+ uint16_t s2 = 0;
+ while (true) {
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_andnot(c1, type1, c2, type2,
+ &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ } else if (s1 < s2) { // s1 < s2
+ const int next_pos1 =
+ ra_advance_until(&x1->high_low_container, s2, pos1);
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, next_pos1,
+ is_cow(x1));
+ // TODO : perhaps some of the copy_on_write should be based on
+ // answer rather than x1 (more stringent?). Many similar cases
+ pos1 = next_pos1;
+ if (pos1 == length1) break;
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ if (pos2 == length2) break;
+ }
+ }
+ if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace andnot (modifies its first argument).
+
+void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+ int intersection_size = 0;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_clear(x1);
+ return;
+ }
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_andnot(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // release
+ }
+ else {
+ c = container_iandnot(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size++, s1,
+ c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ if (pos1 != intersection_size) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size, s1, c1,
+ type1);
+ }
+ intersection_size++;
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+
+ if (pos1 < length1) {
+ // all containers between intersection_size and
+ // pos1 are junk. However, they have either been moved
+ // (thus still referenced) or involved in an iandnot
+ // that will clean up all containers that could not be reused.
+ // Thus we should not free the junk containers between
+ // intersection_size and pos1.
+ if (pos1 > intersection_size) {
+ // left slide of remaining items
+ ra_copy_range(&x1->high_low_container, pos1, length1,
+ intersection_size);
+ }
+ // else current placement is fine
+ intersection_size += (length1 - pos1);
+ }
+ ra_downsize(&x1->high_low_container, intersection_size);
+}
+
+uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ uint64_t card = 0;
+ int i; for (i = 0; i < ra->size; ++i)
+ card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
+ return card;
+}
+
+uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
+ uint64_t range_start,
+ uint64_t range_end) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ if (range_end > UINT32_MAX) {
+ range_end = UINT32_MAX + UINT64_C(1);
+ }
+ if (range_start >= range_end) {
+ return 0;
+ }
+ range_end--; // make range_end inclusive
+ // now we have: 0 <= range_start <= range_end <= UINT32_MAX
+
+ uint16_t minhb = range_start >> 16;
+ uint16_t maxhb = range_end >> 16;
+
+ uint64_t card = 0;
+
+ int i = ra_get_index(ra, minhb);
+ if (i >= 0) {
+ if (minhb == maxhb) {
+ card += container_rank(ra->containers[i], ra->typecodes[i],
+ range_end & 0xffff);
+ } else {
+ card += container_get_cardinality(ra->containers[i],
+ ra->typecodes[i]);
+ }
+ if ((range_start & 0xffff) != 0) {
+ card -= container_rank(ra->containers[i], ra->typecodes[i],
+ (range_start & 0xffff) - 1);
+ }
+ i++;
+ } else {
+ i = -i - 1;
+ }
+
+ for (; i < ra->size; i++) {
+ uint16_t key = ra->keys[i];
+ if (key < maxhb) {
+ card += container_get_cardinality(ra->containers[i],
+ ra->typecodes[i]);
+ } else if (key == maxhb) {
+ card += container_rank(ra->containers[i], ra->typecodes[i],
+ range_end & 0xffff);
+ break;
+ } else {
+ break;
+ }
+ }
+
+ return card;
+}
+
+
+bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
+ return r->high_low_container.size == 0;
+}
+
+void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
+ ra_to_uint32_array(&r->high_low_container, ans);
+}
+
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
+ size_t offset, size_t limit,
+ uint32_t *ans) {
+ return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
+}
+
+/** convert array and bitmap containers to run containers when it is more
+ * efficient;
+ * also convert from run containers when more space efficient. Returns
+ * true if the result has at least one run container.
+*/
+bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
+ bool answer = false;
+ int i; for (i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original, type_after;
+ ra_unshare_container_at_index(
+ &r->high_low_container, i); // TODO: this introduces extra cloning!
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ container_t *c1 = convert_run_optimize(c, type_original, &type_after);
+ if (type_after == RUN_CONTAINER_TYPE) {
+ answer = true;
+ }
+ ra_set_container_at_index(&r->high_low_container, i, c1, type_after);
+ }
+ return answer;
+}
+
+size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
+ size_t answer = 0;
+ int i; for (i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ answer += container_shrink_to_fit(c, type_original);
+ }
+ answer += ra_shrink_to_fit(&r->high_low_container);
+ return answer;
+}
+
+/**
+ * Remove run-length encoding even when it is more space efficient
+ * return whether a change was applied
+ */
+bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
+ bool answer = false;
+ int i; for (i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original, type_after;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
+ answer = true;
+ if (type_original == SHARED_CONTAINER_TYPE) {
+ run_container_t *truec = CAST_run(CAST_shared(c)->container);
+ int32_t card = run_container_cardinality(truec);
+ container_t *c1 = convert_to_bitset_or_array_container(
+ truec, card, &type_after);
+ shared_container_free(CAST_shared(c)); // frees run as needed
+ ra_set_container_at_index(&r->high_low_container, i, c1,
+ type_after);
+
+ } else {
+ int32_t card = run_container_cardinality(CAST_run(c));
+ container_t *c1 = convert_to_bitset_or_array_container(
+ CAST_run(c), card, &type_after);
+ run_container_free(CAST_run(c));
+ ra_set_container_at_index(&r->high_low_container, i, c1,
+ type_after);
+ }
+ }
+ }
+ return answer;
+}
+
+size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) {
+ size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
+ uint64_t cardinality = roaring_bitmap_get_cardinality(r);
+ uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
+ if (portablesize < sizeasarray) {
+ buf[0] = CROARING_SERIALIZATION_CONTAINER;
+ return roaring_bitmap_portable_serialize(r, buf + 1) + 1;
+ } else {
+ buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32;
+ memcpy(buf + 1, &cardinality, sizeof(uint32_t));
+ roaring_bitmap_to_uint32_array(
+ r, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
+ return 1 + (size_t)sizeasarray;
+ }
+}
+
+size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) {
+ size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
+ uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) +
+ sizeof(uint32_t);
+ return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
+}
+
+size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
+ return ra_portable_size_in_bytes(&r->high_low_container);
+}
+
+
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (ans == NULL) {
+ return NULL;
+ }
+ size_t bytesread;
+ bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
+ if(is_ok) assert(bytesread <= maxbytes);
+ roaring_bitmap_set_copy_on_write(ans, false);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ return ans;
+}
+
+roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
+ return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
+}
+
+
+size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
+ return ra_portable_deserialize_size(buf, maxbytes);
+}
+
+
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r,
+ char *buf) {
+ return ra_portable_serialize(&r->high_low_container, buf);
+}
+
+roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
+ const char *bufaschar = (const char *)buf;
+ if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
+ /* This looks like a compressed set of uint32_t elements */
+ uint32_t card;
+ memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+ const uint32_t *elems =
+ (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+ roaring_bitmap_t *bitmap = roaring_bitmap_create();
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ roaring_bulk_context_t context;
+
+ memset(&context, 0, sizeof(context));
+ uint32_t i; for ( i = 0; i < card; i++) {
+ // elems may not be aligned, read with memcpy
+ uint32_t elem;
+ memcpy(&elem, elems + i, sizeof(elem));
+ roaring_bitmap_add_bulk(bitmap, &context, elem);
+ }
+ return bitmap;
+ } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
+ return roaring_bitmap_portable_deserialize(bufaschar + 1);
+ } else
+ return (NULL);
+}
+
+bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
+ void *ptr) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ int i; for (i = 0; i < ra->size; ++i)
+ if (!container_iterate(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16,
+ iterator, ptr)) {
+ return false;
+ }
+ return true;
+}
+
+bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
+ uint64_t high_bits, void *ptr) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ int i; for (i = 0; i < ra->size; ++i)
+ if (!container_iterate64(
+ ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16, iterator,
+ high_bits, ptr)) {
+ return false;
+ }
+ return true;
+}
+
+/****
+* begin roaring_uint32_iterator_t
+*****/
+
+// Partially initializes the roaring iterator when it begins looking at
+// a new container.
+static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
+ newit->in_container_index = 0;
+ newit->run_index = 0;
+ newit->current_value = 0;
+ if (newit->container_index >= newit->parent->high_low_container.size ||
+ newit->container_index < 0) {
+ newit->current_value = UINT32_MAX;
+ return (newit->has_value = false);
+ }
+ // assume not empty
+ newit->has_value = true;
+ // we precompute container, typecode and highbits so that successive
+ // iterators do not have to grab them from odd memory locations
+ // and have to worry about the (easily predicted) container_unwrap_shared
+ // call.
+ newit->container =
+ newit->parent->high_low_container.containers[newit->container_index];
+ newit->typecode =
+ newit->parent->high_low_container.typecodes[newit->container_index];
+ newit->highbits =
+ ((uint32_t)
+ newit->parent->high_low_container.keys[newit->container_index])
+ << 16;
+ newit->container =
+ container_unwrap_shared(newit->container, &(newit->typecode));
+ return newit->has_value;
+}
+
+static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
+ if (!iter_new_container_partial_init(newit))
+ return newit->has_value;
+
+ switch (newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(newit->container);
+
+ uint32_t wordindex = 0;
+ uint64_t word;
+ while ((word = bc->words[wordindex]) == 0) {
+ wordindex++; // advance
+ }
+ // here "word" is non-zero
+ newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(newit->container);
+ newit->current_value = newit->highbits | ac->array[0];
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(newit->container);
+ newit->current_value = newit->highbits | rc->runs[0].value;
+ break; }
+
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ } // switch (typecode)
+ return true;
+}
+
+static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
+ if (!iter_new_container_partial_init(newit))
+ return newit->has_value;
+
+ switch(newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
+ uint64_t word;
+ const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
+ while ((word = bitset_container->words[wordindex]) == 0)
+ --wordindex;
+
+ int num_leading_zeros = __builtin_clzll(word);
+ newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t* array_container = (const array_container_t*)newit->container;
+ newit->in_container_index = array_container->cardinality - 1;
+ newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t* run_container = (const run_container_t*)newit->container;
+ newit->run_index = run_container->n_runs - 1;
+ const rle16_t* last_run = &run_container->runs[newit->run_index];
+ newit->current_value = newit->highbits | (last_run->value + last_run->length);
+ break;
+ }
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ }
+ return true;
+}
+
+// prerequesite: the value should be in range of the container
+static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
+ // Don't have to check return value because of prerequisite
+ iter_new_container_partial_init(newit);
+ uint16_t lb = val & 0xFFFF;
+
+ switch (newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(newit->container);
+ newit->in_container_index =
+ bitset_container_index_equalorlarger(bc, lb);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(newit->container);
+ newit->in_container_index =
+ array_container_index_equalorlarger(ac, lb);
+ newit->current_value =
+ newit->highbits | ac->array[newit->in_container_index];
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(newit->container);
+ newit->run_index = run_container_index_equalorlarger(rc, lb);
+ if (rc->runs[newit->run_index].value <= lb) {
+ newit->current_value = val;
+ } else {
+ newit->current_value =
+ newit->highbits | rc->runs[newit->run_index].value;
+ }
+ break; }
+
+ default:
+ __builtin_unreachable();
+ }
+
+ return true;
+}
+
+void roaring_init_iterator(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit) {
+ newit->parent = r;
+ newit->container_index = 0;
+ newit->has_value = loadfirstvalue(newit);
+}
+
+void roaring_init_iterator_last(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit) {
+ newit->parent = r;
+ newit->container_index = newit->parent->high_low_container.size - 1;
+ newit->has_value = loadlastvalue(newit);
+}
+
+roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) {
+ roaring_uint32_iterator_t *newit =
+ (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ if (newit == NULL) return NULL;
+ roaring_init_iterator(r, newit);
+ return newit;
+}
+
+roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+ const roaring_uint32_iterator_t *it) {
+ roaring_uint32_iterator_t *newit =
+ (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
+ return newit;
+}
+
+bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
+ uint16_t hb = val >> 16;
+ const int i = ra_get_index(& it->parent->high_low_container, hb);
+ if (i >= 0) {
+ uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
+ uint16_t lb = val & 0xFFFF;
+ if(lowvalue < lb ) {
+ it->container_index = i+1; // will have to load first value of next container
+ } else {// the value is necessarily within the range of the container
+ it->container_index = i;
+ it->has_value = loadfirstvalue_largeorequal(it, val);
+ return it->has_value;
+ }
+ } else {
+ // there is no matching, so we are going for the next container
+ it->container_index = -i-1;
+ }
+ it->has_value = loadfirstvalue(it);
+ return it->has_value;
+}
+
+
+bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
+ if (it->container_index >= it->parent->high_low_container.size) {
+ return (it->has_value = false);
+ }
+ if (it->container_index < 0) {
+ it->container_index = 0;
+ return (it->has_value = loadfirstvalue(it));
+ }
+
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(it->container);
+ it->in_container_index++;
+
+ uint32_t wordindex = it->in_container_index / 64;
+ if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
+
+ uint64_t word = bc->words[wordindex] &
+ (UINT64_MAX << (it->in_container_index % 64));
+ // next part could be optimized/simplified
+ while ((word == 0) &&
+ (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
+ wordindex++;
+ word = bc->words[wordindex];
+ }
+ if (word != 0) {
+ it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+ it->current_value = it->highbits | it->in_container_index;
+ return (it->has_value = true);
+ }
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(it->container);
+ it->in_container_index++;
+ if (it->in_container_index < ac->cardinality) {
+ it->current_value =
+ it->highbits | ac->array[it->in_container_index];
+ return (it->has_value = true);
+ }
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ if(it->current_value == UINT32_MAX) { // avoid overflow to zero
+ return (it->has_value = false);
+ }
+
+ const run_container_t* rc = const_CAST_run(it->container);
+ uint32_t limit = (it->highbits | (rc->runs[it->run_index].value +
+ rc->runs[it->run_index].length));
+ if (++it->current_value <= limit) {
+ return (it->has_value = true);
+ }
+
+ if (++it->run_index < rc->n_runs) { // Assume the run has a value
+ it->current_value =
+ it->highbits | rc->runs[it->run_index].value;
+ return (it->has_value = true);
+ }
+ break;
+ }
+
+ default:
+ __builtin_unreachable();
+ }
+
+ // moving to next container
+ it->container_index++;
+ return (it->has_value = loadfirstvalue(it));
+}
+
+bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
+ if (it->container_index < 0) {
+ return (it->has_value = false);
+ }
+ if (it->container_index >= it->parent->high_low_container.size) {
+ it->container_index = it->parent->high_low_container.size - 1;
+ return (it->has_value = loadlastvalue(it));
+ }
+
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ if (--it->in_container_index < 0)
+ break;
+
+ const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
+ int32_t wordindex = it->in_container_index / 64;
+ uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
+
+ while (word == 0 && --wordindex >= 0) {
+ word = bitset_container->words[wordindex];
+ }
+ if (word == 0)
+ break;
+
+ int num_leading_zeros = __builtin_clzll(word);
+ it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
+ it->current_value = it->highbits | it->in_container_index;
+ return (it->has_value = true);
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ if (--it->in_container_index < 0)
+ break;
+
+ const array_container_t* array_container = (const array_container_t*)it->container;
+ it->current_value = it->highbits | array_container->array[it->in_container_index];
+ return (it->has_value = true);
+ }
+ case RUN_CONTAINER_TYPE: {
+ if(it->current_value == 0)
+ return (it->has_value = false);
+
+ const run_container_t* run_container = (const run_container_t*)it->container;
+ if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
+ return (it->has_value = true);
+ }
+
+ if (--it->run_index < 0)
+ break;
+
+ it->current_value = it->highbits | (run_container->runs[it->run_index].value +
+ run_container->runs[it->run_index].length);
+ return (it->has_value = true);
+ }
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ } // switch (typecode)
+
+ // moving to previous container
+ it->container_index--;
+ return (it->has_value = loadlastvalue(it));
+}
+
+uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
+ uint32_t ret = 0;
+ uint32_t num_values;
+ uint32_t wordindex; // used for bitsets
+ uint64_t word; // used for bitsets
+ const array_container_t* acont; //TODO remove
+ const run_container_t* rcont; //TODO remove
+ const bitset_container_t* bcont; //TODO remove
+
+ while (it->has_value && ret < count) {
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE:
+ bcont = const_CAST_bitset(it->container);
+ wordindex = it->in_container_index / 64;
+ word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
+ do {
+ while (word != 0 && ret < count) {
+ buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
+ word = word & (word - 1);
+ buf++;
+ ret++;
+ }
+ while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
+ wordindex++;
+ word = bcont->words[wordindex];
+ }
+ } while (word != 0 && ret < count);
+ it->has_value = (word != 0);
+ if (it->has_value) {
+ it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+ it->current_value = it->highbits | it->in_container_index;
+ }
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ acont = const_CAST_array(it->container);
+ num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
+ uint32_t i; for ( i = 0; i < num_values; i++) {
+ buf[i] = it->highbits | acont->array[it->in_container_index + i];
+ }
+ buf += num_values;
+ ret += num_values;
+ it->in_container_index += num_values;
+ it->has_value = (it->in_container_index < acont->cardinality);
+ if (it->has_value) {
+ it->current_value = it->highbits | acont->array[it->in_container_index];
+ }
+ break;
+ case RUN_CONTAINER_TYPE:
+ rcont = const_CAST_run(it->container);
+ //"in_run_index" name is misleading, read it as "max_value_in_current_run"
+ do {
+ uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
+ num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret);
+ uint32_t i; for ( i = 0; i < num_values; i++) {
+ buf[i] = it->current_value + i;
+ }
+ it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
+ buf += num_values;
+ ret += num_values;
+
+ if (it->current_value > largest_run_value || it->current_value == 0) {
+ it->run_index++;
+ if (it->run_index < rcont->n_runs) {
+ it->current_value = it->highbits | rcont->runs[it->run_index].value;
+ } else {
+ it->has_value = false;
+ }
+ }
+ } while ((ret < count) && it->has_value);
+ break;
+ default:
+ assert(false);
+ }
+ if (it->has_value) {
+ assert(ret == count);
+ return ret;
+ }
+ it->container_index++;
+ it->has_value = loadfirstvalue(it);
+ }
+ return ret;
+}
+
+
+
+void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); }
+
+/****
+* end of roaring_uint32_iterator_t
+*****/
+
+bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ const roaring_array_t *ra1 = &r1->high_low_container;
+ const roaring_array_t *ra2 = &r2->high_low_container;
+
+ if (ra1->size != ra2->size) {
+ return false;
+ }
+ int i; for (i = 0; i < ra1->size; ++i) {
+ if (ra1->keys[i] != ra2->keys[i]) {
+ return false;
+ }
+ }
+ for (i = 0; i < ra1->size; ++i) {
+ bool areequal = container_equals(ra1->containers[i],
+ ra1->typecodes[i],
+ ra2->containers[i],
+ ra2->typecodes[i]);
+ if (!areequal) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ const roaring_array_t *ra1 = &r1->high_low_container;
+ const roaring_array_t *ra2 = &r2->high_low_container;
+
+ const int length1 = ra1->size,
+ length2 = ra2->size;
+
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(ra1, pos1);
+ const uint16_t s2 = ra_get_key_at_index(ra2, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2);
+ if (!container_is_subset(c1, type1, c2, type2))
+ return false;
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ return false;
+ } else { // s1 > s2
+ pos2 = ra_advance_until(ra2, s1, pos2);
+ }
+ }
+ if (pos1 == length1)
+ return true;
+ else
+ return false;
+}
+
+static void insert_flipped_container(roaring_array_t *ans_arr,
+ const roaring_array_t *x1_arr, uint16_t hb,
+ uint16_t lb_start, uint16_t lb_end) {
+ const int i = ra_get_index(x1_arr, hb);
+ const int j = ra_get_index(ans_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
+ (uint32_t)(lb_end + 1), &ctype_out);
+
+ if (container_get_cardinality(flipped_container, ctype_out))
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ else {
+ container_free(flipped_container, ctype_out);
+ }
+ } else {
+ flipped_container = container_range_of_ones(
+ (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
+ uint16_t lb_start, uint16_t lb_end) {
+ const int i = ra_get_index(x1_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container = container_inot_range(
+ container_to_flip, ctype_in, (uint32_t)lb_start,
+ (uint32_t)(lb_end + 1), &ctype_out);
+ // if a new container was created, the old one was already freed
+ if (container_get_cardinality(flipped_container, ctype_out)) {
+ ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
+ } else {
+ container_free(flipped_container, ctype_out);
+ ra_remove_at_index(x1_arr, i);
+ }
+
+ } else {
+ flipped_container = container_range_of_ones(
+ (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
+ ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+static void insert_fully_flipped_container(roaring_array_t *ans_arr,
+ const roaring_array_t *x1_arr,
+ uint16_t hb) {
+ const int i = ra_get_index(x1_arr, hb);
+ const int j = ra_get_index(ans_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_not(container_to_flip, ctype_in, &ctype_out);
+ if (container_get_cardinality(flipped_container, ctype_out))
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ else {
+ container_free(flipped_container, ctype_out);
+ }
+ } else {
+ flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
+ const int i = ra_get_index(x1_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_inot(container_to_flip, ctype_in, &ctype_out);
+
+ if (container_get_cardinality(flipped_container, ctype_out)) {
+ ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
+ } else {
+ container_free(flipped_container, ctype_out);
+ ra_remove_at_index(x1_arr, i);
+ }
+
+ } else {
+ flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
+ ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
+ uint64_t range_start,
+ uint64_t range_end) {
+ if (range_start >= range_end) {
+ return roaring_bitmap_copy(x1);
+ }
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+
+ roaring_bitmap_t *ans = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(ans, is_cow(x1));
+
+ uint16_t hb_start = (uint16_t)(range_start >> 16);
+ const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
+ uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
+ const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
+
+ ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
+ hb_start, is_cow(x1));
+ if (hb_start == hb_end) {
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_start, lb_start,
+ lb_end);
+ } else {
+ // start and end containers are distinct
+ if (lb_start > 0) {
+ // handle first (partial) container
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_start,
+ lb_start, 0xFFFF);
+ ++hb_start; // for the full containers. Can't wrap.
+ }
+
+ if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
+
+ uint32_t hb; for ( hb = hb_start; hb <= hb_end; ++hb) {
+ insert_fully_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb);
+ }
+
+ // handle a partial final container
+ if (lb_end != 0xFFFF) {
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_end + 1, 0,
+ lb_end);
+ ++hb_end;
+ }
+ }
+ ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
+ hb_end, is_cow(x1));
+ return ans;
+}
+
+void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
+ uint64_t range_end) {
+ if (range_start >= range_end) {
+ return; // empty range
+ }
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+
+ uint16_t hb_start = (uint16_t)(range_start >> 16);
+ const uint16_t lb_start = (uint16_t)range_start;
+ uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
+ const uint16_t lb_end = (uint16_t)(range_end - 1);
+
+ if (hb_start == hb_end) {
+ inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
+ lb_end);
+ } else {
+ // start and end containers are distinct
+ if (lb_start > 0) {
+ // handle first (partial) container
+ inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
+ 0xFFFF);
+ ++hb_start; // for the full containers. Can't wrap.
+ }
+
+ if (lb_end != 0xFFFF) --hb_end;
+
+ uint32_t hb; for ( hb = hb_start; hb <= hb_end; ++hb) {
+ inplace_fully_flip_container(&x1->high_low_container, hb);
+ }
+ // handle a partial final container
+ if (lb_end != 0xFFFF) {
+ inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
+ lb_end);
+ ++hb_end;
+ }
+ }
+}
+
+static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, uint8_t t) {
+ int size = ra_get_size(ra);
+ if (size == 0 || ra_get_key_at_index(ra, size-1) != k) {
+ // No merge.
+ ra_append(ra, k, c, t);
+ return;
+ }
+
+ uint8_t last_t, new_t;
+ container_t *last_c, *new_c;
+
+ // NOTE: we don't need to unwrap here, since we added last_c ourselves
+ // we have the certainty it's not a shared container.
+ // The same applies to c, as it's the result of calling container_offset.
+ last_c = ra_get_container_at_index(ra, size-1, &last_t);
+ new_c = container_ior(last_c, last_t, c, t, &new_t);
+
+ ra_set_container_at_index(ra, size-1, new_c, new_t);
+
+ // Comparison of pointers of different origin is UB (or so claim some compiler
+ // makers), so we compare their bit representation only.
+ if ((uintptr_t)last_c != (uintptr_t)new_c) {
+ container_free(last_c, last_t);
+ }
+ container_free(c, t);
+}
+
+// roaring_bitmap_add_offset adds the value 'offset' to each and every value in
+// a bitmap, generating a new bitmap in the process. If offset + element is
+// outside of the range [0,2^32), that the element will be dropped.
+// We need "offset" to be 64 bits because we want to support values
+// between -0xFFFFFFFF up to +0xFFFFFFFF.
+roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
+ int64_t offset) {
+ roaring_bitmap_t *answer;
+ roaring_array_t *ans_ra;
+ int64_t container_offset;
+ uint16_t in_offset;
+
+ const roaring_array_t *bm_ra = &bm->high_low_container;
+ int length = bm_ra->size;
+
+ if (offset == 0) {
+ return roaring_bitmap_copy(bm);
+ }
+
+ container_offset = offset >> 16;
+ in_offset = (uint16_t)(offset - container_offset * (1 << 16));
+
+ answer = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(answer, is_cow(bm));
+
+ ans_ra = &answer->high_low_container;
+
+ if (in_offset == 0) {
+ ans_ra = &answer->high_low_container;
+
+ int i, j; for (i = 0, j = 0; i < length; ++i) {
+ int64_t key = ra_get_key_at_index(bm_ra, i);
+ key += container_offset;
+
+ if (key < 0 || key >= (1 << 16)) {
+ continue;
+ }
+
+ ra_append_copy(ans_ra, bm_ra, i, false);
+ ans_ra->keys[j++] = key;
+ }
+
+ return answer;
+ }
+
+ uint8_t t;
+ const container_t *c;
+ container_t *lo, *hi, **lo_ptr, **hi_ptr;
+ int64_t k;
+
+ int i; for (i = 0; i < length; ++i) {
+ lo = hi = NULL;
+ lo_ptr = hi_ptr = NULL;
+
+ k = ra_get_key_at_index(bm_ra, i)+container_offset;
+ if (k >= 0 && k < (1 << 16)) {
+ lo_ptr = &lo;
+ }
+ if (k+1 >= 0 && k+1 < (1 << 16)) {
+ hi_ptr = &hi;
+ }
+ if (lo_ptr == NULL && hi_ptr == NULL) {
+ continue;
+ }
+
+ c = ra_get_container_at_index(bm_ra, i, &t);
+ c = container_unwrap_shared(c, &t);
+
+ container_add_offset(c, t, lo_ptr, hi_ptr, in_offset);
+ if (lo != NULL) {
+ offset_append_with_merge(ans_ra, k, lo, t);
+ }
+ if (hi != NULL) {
+ ra_append(ans_ra, k+1, hi, t);
+ }
+ }
+
+ return answer;
+}
+
+roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2,
+ const bool bitsetconversion) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c;
+ if (bitsetconversion &&
+ (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) &&
+ (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)
+ ){
+ container_t *newc1 =
+ container_mutable_unwrap_shared(c1, &type1);
+ newc1 = container_to_bitset(newc1, type1);
+ type1 = BITSET_CONTAINER_TYPE;
+ c = container_lazy_ior(newc1, type1, c2, type2,
+ &result_type);
+ if (c != newc1) { // should not happen
+ container_free(newc1, type1);
+ }
+ } else {
+ c = container_lazy_or(c1, type1, c2, type2, &result_type);
+ }
+ // since we assume that the initial containers are non-empty,
+ // the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2,
+ const bool bitsetconversion) {
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ if (!container_is_full(c1, type1)) {
+ if ((bitsetconversion == false) ||
+ (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)
+ ){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ } else {
+ // convert to bitset
+ container_t *old_c1 = c1;
+ uint8_t old_type1 = type1;
+ c1 = container_mutable_unwrap_shared(c1, &type1);
+ c1 = container_to_bitset(c1, type1);
+ container_free(old_c1, old_type1);
+ type1 = BITSET_CONTAINER_TYPE;
+ }
+
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_lazy_ior(c1, type1, c2, type2,
+ &result_type);
+
+ if (c != c1) { // in this instance a new container was created,
+ // and we need to free the old one
+ container_free(c1, type1);
+ }
+
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // container_t *c2_clone = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_lazy_xor(
+ c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_lazy_xor(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // release
+ }
+ else {
+ c = container_lazy_ixor(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ ++pos1;
+ } else {
+ container_free(c, result_type);
+ ra_remove_at_index(&x1->high_low_container, pos1);
+ --length1;
+ }
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // container_t *c2_clone = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
+ roaring_array_t *ra = &r->high_low_container;
+
+ int i; for (i = 0; i < ra->size; ++i) {
+ const uint8_t old_type = ra->typecodes[i];
+ container_t *old_c = ra->containers[i];
+ uint8_t new_type = old_type;
+ container_t *new_c = container_repair_after_lazy(old_c, &new_type);
+ ra->containers[i] = new_c;
+ ra->typecodes[i] = new_type;
+ }
+}
+
+
+
+/**
+* roaring_bitmap_rank returns the number of integers that are smaller or equal
+* to x.
+*/
+uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
+ uint64_t size = 0;
+ uint32_t xhigh = x >> 16;
+ int i; for (i = 0; i < bm->high_low_container.size; i++) {
+ uint32_t key = bm->high_low_container.keys[i];
+ if (xhigh > key) {
+ size +=
+ container_get_cardinality(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i]);
+ } else if (xhigh == key) {
+ return size + container_rank(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i],
+ x & 0xFFFF);
+ } else {
+ return size;
+ }
+ }
+ return size;
+}
+
+/**
+* roaring_bitmap_smallest returns the smallest value in the set.
+* Returns UINT32_MAX if the set is empty.
+*/
+uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
+ if (bm->high_low_container.size > 0) {
+ container_t *c = bm->high_low_container.containers[0];
+ uint8_t type = bm->high_low_container.typecodes[0];
+ uint32_t key = bm->high_low_container.keys[0];
+ uint32_t lowvalue = container_minimum(c, type);
+ return lowvalue | (key << 16);
+ }
+ return UINT32_MAX;
+}
+
+/**
+* roaring_bitmap_smallest returns the greatest value in the set.
+* Returns 0 if the set is empty.
+*/
+uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
+ if (bm->high_low_container.size > 0) {
+ container_t *container =
+ bm->high_low_container.containers[bm->high_low_container.size - 1];
+ uint8_t typecode =
+ bm->high_low_container.typecodes[bm->high_low_container.size - 1];
+ uint32_t key =
+ bm->high_low_container.keys[bm->high_low_container.size - 1];
+ uint32_t lowvalue = container_maximum(container, typecode);
+ return lowvalue | (key << 16);
+ }
+ return 0;
+}
+
+bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
+ uint32_t *element) {
+ container_t *container;
+ uint8_t typecode;
+ uint16_t key;
+ uint32_t start_rank = 0;
+ int i = 0;
+ bool valid = false;
+ while (!valid && i < bm->high_low_container.size) {
+ container = bm->high_low_container.containers[i];
+ typecode = bm->high_low_container.typecodes[i];
+ valid =
+ container_select(container, typecode, &start_rank, rank, element);
+ i++;
+ }
+
+ if (valid) {
+ key = bm->high_low_container.keys[i - 1];
+ *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed
+ return true;
+ } else
+ return false;
+}
+
+bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint64_t answer = 0;
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ if (container_intersect(c1, type1, c2, type2))
+ return true;
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer != 0;
+}
+
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
+ uint64_t x, uint64_t y) {
+ if (x >= y) {
+ // Empty range.
+ return false;
+ }
+ roaring_uint32_iterator_t it;
+ roaring_init_iterator(bm, &it);
+ if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) {
+ // No values above x.
+ return false;
+ }
+ if (it.current_value >= y) {
+ // No values below y.
+ return false;
+ }
+ return true;
+}
+
+
+uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint64_t answer = 0;
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ answer += container_and_cardinality(c1, type1, c2, type2);
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer;
+}
+
+double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return (double)inter / (double)(c1 + c2 - inter);
+}
+
+uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 + c2 - inter;
+}
+
+uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 - inter;
+}
+
+uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 + c2 - 2 * inter;
+}
+
+
+bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ /*
+ * the next function call involves a binary search and lots of branching.
+ */
+ int32_t i = ra_get_index(&r->high_low_container, hb);
+ if (i < 0) return false;
+
+ uint8_t typecode;
+ // next call ought to be cheap
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ // rest might be a tad expensive, possibly involving another round of binary search
+ return container_contains(container, val & 0xFFFF, typecode);
+}
+
+
+/**
+ * Check whether a range of values from range_start (included) to range_end (excluded) is present
+ */
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+ if (range_start >= range_end) return true; // empty range are always contained!
+ if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
+ uint16_t hb_rs = (uint16_t)(range_start >> 16);
+ uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
+ const int32_t span = hb_re - hb_rs;
+ const int32_t hlc_sz = ra_get_size(&r->high_low_container);
+ if (hlc_sz < span + 1) {
+ return false;
+ }
+ int32_t is = ra_get_index(&r->high_low_container, hb_rs);
+ int32_t ie = ra_get_index(&r->high_low_container, hb_re);
+ ie = (ie < 0 ? -ie - 1 : ie);
+ if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
+ return false;
+ }
+ const uint32_t lb_rs = range_start & 0xFFFF;
+ const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
+ uint8_t type;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, is,
+ &type);
+ if (hb_rs == hb_re) {
+ return container_contains_range(c, lb_rs, lb_re, type);
+ }
+ if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
+ return false;
+ }
+ c = ra_get_container_at_index(&r->high_low_container, ie, &type);
+ if (!container_contains_range(c, 0, lb_re, type)) {
+ return false;
+ }
+ int32_t i; for (i = is + 1; i < ie; ++i) {
+ c = ra_get_container_at_index(&r->high_low_container, i, &type);
+ if (!container_is_full(c, type) ) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ return (roaring_bitmap_get_cardinality(r2) >
+ roaring_bitmap_get_cardinality(r1) &&
+ roaring_bitmap_is_subset(r1, r2));
+}
+
+
+/*
+ * FROZEN SERIALIZATION FORMAT DESCRIPTION
+ *
+ * -- (beginning must be aligned by 32 bytes) --
+ * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
+ * <run_data> rle16_t[total number of rle elements in all run containers]
+ * <array_data> uint16_t[total number of array elements in all array containers]
+ * <keys> uint16_t[num_containers]
+ * <counts> uint16_t[num_containers]
+ * <typecodes> uint8_t[num_containers]
+ * <header> uint32_t
+ *
+ * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
+ * and the number of containers (17 bits).
+ *
+ * <counts> stores number of elements for every container.
+ * Its meaning depends on container type.
+ * For array and bitset containers, this value is the container cardinality minus one.
+ * For run container, it is the number of rle_t elements (n_runs).
+ *
+ * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
+ * all containers of respective type.
+ *
+ * <*_data> and <keys> are kept close together because they are not accessed
+ * during deserilization. This may reduce IO in case of large mmaped bitmaps.
+ * All members have their native alignments during deserilization except <header>,
+ * which is not guaranteed to be aligned by 4 bytes.
+ */
+
+size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
+ const roaring_array_t *ra = &rb->high_low_container;
+ size_t num_bytes = 0;
+ int32_t i; for (i = 0; i < ra->size; i++) {
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ num_bytes += rc->n_runs * sizeof(rle16_t);
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ num_bytes += ac->cardinality * sizeof(uint16_t);
+ break;
+ }
+ default:
+ __builtin_unreachable();
+ }
+ }
+ num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
+ num_bytes += 4; // header
+ return num_bytes;
+}
+
+inline static void *arena_alloc(char **arena, size_t num_bytes) {
+ char *res = *arena;
+ *arena += num_bytes;
+ return res;
+}
+
+void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
+ /*
+ * Note: we do not require user to supply a specifically aligned buffer.
+ * Thus we have to use memcpy() everywhere.
+ */
+
+ const roaring_array_t *ra = &rb->high_low_container;
+
+ size_t bitset_zone_size = 0;
+ size_t run_zone_size = 0;
+ size_t array_zone_size = 0;
+ int32_t i; for (i = 0; i < ra->size; i++) {
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_zone_size +=
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ run_zone_size += rc->n_runs * sizeof(rle16_t);
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ array_zone_size += ac->cardinality * sizeof(uint16_t);
+ break;
+ }
+ default:
+ __builtin_unreachable();
+ }
+ }
+
+ uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
+ rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
+ uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
+ uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
+ uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);
+
+ for (i = 0; i < ra->size; i++) {
+ uint16_t count;
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc =
+ const_CAST_bitset(ra->containers[i]);
+ memcpy(bitset_zone, bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
+ if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ count = bc->cardinality - 1;
+ } else {
+ count = bitset_container_compute_cardinality(bc) - 1;
+ }
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ size_t num_bytes = rc->n_runs * sizeof(rle16_t);
+ memcpy(run_zone, rc->runs, num_bytes);
+ run_zone += rc->n_runs;
+ count = rc->n_runs;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ size_t num_bytes = ac->cardinality * sizeof(uint16_t);
+ memcpy(array_zone, ac->array, num_bytes);
+ array_zone += ac->cardinality;
+ count = ac->cardinality - 1;
+ break;
+ }
+ default:
+ __builtin_unreachable();
+ }
+ memcpy(&count_zone[i], &count, 2);
+ }
+ memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t));
+ memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t));
+ uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
+ memcpy(header_zone, &header, 4);
+}
+
+const roaring_bitmap_t *
+roaring_bitmap_frozen_view(const char *buf, size_t length) {
+ if ((uintptr_t)buf % 32 != 0) {
+ return NULL;
+ }
+
+ // cookie and num_containers
+ if (length < 4) {
+ return NULL;
+ }
+ uint32_t header;
+ memcpy(&header, buf + length - 4, 4); // header may be misaligned
+ if ((header & 0x7FFF) != FROZEN_COOKIE) {
+ return NULL;
+ }
+ int32_t num_containers = (header >> 15);
+
+ // typecodes, counts and keys
+ if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
+ return NULL;
+ }
+ uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
+ uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
+ uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);
+
+ // {bitset,array,run}_zone
+ int32_t num_bitset_containers = 0;
+ int32_t num_run_containers = 0;
+ int32_t num_array_containers = 0;
+ size_t bitset_zone_size = 0;
+ size_t run_zone_size = 0;
+ size_t array_zone_size = 0;
+ int32_t i; for (i = 0; i < num_containers; i++) {
+ switch (typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ num_bitset_containers++;
+ bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ case RUN_CONTAINER_TYPE:
+ num_run_containers++;
+ run_zone_size += counts[i] * sizeof(rle16_t);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ num_array_containers++;
+ array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
+ break;
+ default:
+ return NULL;
+ }
+ }
+ if (length != bitset_zone_size + run_zone_size + array_zone_size +
+ 5 * num_containers + 4) {
+ return NULL;
+ }
+ uint64_t *bitset_zone = (uint64_t*) (buf);
+ rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
+ uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
+
+ size_t alloc_size = 0;
+ alloc_size += sizeof(roaring_bitmap_t);
+ alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+ alloc_size += num_run_containers * sizeof(run_container_t);
+ alloc_size += num_array_containers * sizeof(array_container_t);
+
+ char *arena = (char *)roaring_malloc(alloc_size);
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ roaring_bitmap_t *rb = (roaring_bitmap_t *)
+ arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+ rb->high_low_container.allocation_size = num_containers;
+ rb->high_low_container.size = num_containers;
+ rb->high_low_container.keys = (uint16_t *)keys;
+ rb->high_low_container.typecodes = (uint8_t *)typecodes;
+ rb->high_low_container.containers =
+ (container_t **)arena_alloc(&arena,
+ sizeof(container_t*) * num_containers);
+ // Ensure offset of high_low_container.containers is known distance used in
+ // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the
+ // only allocation that precedes high_low_container.containers. If this is
+ // changed (new allocation or changed order), this offset will also need to
+ // be changed in the C++ wrapper.
+ assert(rb ==
+ (roaring_bitmap_t *)((char *)rb->high_low_container.containers -
+ sizeof(roaring_bitmap_t)));
+ for (i = 0; i < num_containers; i++) {
+ switch (typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_container_t *bitset = (bitset_container_t *)
+ arena_alloc(&arena, sizeof(bitset_container_t));
+ bitset->words = bitset_zone;
+ bitset->cardinality = counts[i] + UINT32_C(1);
+ rb->high_low_container.containers[i] = bitset;
+ bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ run_container_t *run = (run_container_t *)
+ arena_alloc(&arena, sizeof(run_container_t));
+ run->capacity = counts[i];
+ run->n_runs = counts[i];
+ run->runs = run_zone;
+ rb->high_low_container.containers[i] = run;
+ run_zone += run->n_runs;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ array_container_t *array = (array_container_t *)
+ arena_alloc(&arena, sizeof(array_container_t));
+ array->capacity = counts[i] + UINT32_C(1);
+ array->cardinality = counts[i] + UINT32_C(1);
+ array->array = array_zone;
+ rb->high_low_container.containers[i] = array;
+ array_zone += counts[i] + UINT32_C(1);
+ break;
+ }
+ default:
+ roaring_free(arena);
+ return NULL;
+ }
+ }
+
+ return rb;
+}
+
+ALLOW_UNALIGNED
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
+ char *start_of_buf = (char *) buf;
+ uint32_t cookie;
+ int32_t num_containers;
+ uint16_t *descriptive_headers;
+ uint32_t *offset_headers = NULL;
+ const char *run_flag_bitset = NULL;
+ bool hasrun = false;
+
+ // deserialize cookie
+ memcpy(&cookie, buf, sizeof(uint32_t));
+ buf += sizeof(uint32_t);
+ if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) {
+ memcpy(&num_containers, buf, sizeof(int32_t));
+ buf += sizeof(int32_t);
+ descriptive_headers = (uint16_t *) buf;
+ buf += num_containers * 2 * sizeof(uint16_t);
+ offset_headers = (uint32_t *) buf;
+ buf += num_containers * sizeof(uint32_t);
+ } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) {
+ num_containers = (cookie >> 16) + 1;
+ hasrun = true;
+ int32_t run_flag_bitset_size = (num_containers + 7) / 8;
+ run_flag_bitset = buf;
+ buf += run_flag_bitset_size;
+ descriptive_headers = (uint16_t *) buf;
+ buf += num_containers * 2 * sizeof(uint16_t);
+ if(num_containers >= NO_OFFSET_THRESHOLD) {
+ offset_headers = (uint32_t *) buf;
+ buf += num_containers * sizeof(uint32_t);
+ }
+ } else {
+ return NULL;
+ }
+
+ // calculate total size for allocation
+ int32_t num_bitset_containers = 0;
+ int32_t num_run_containers = 0;
+ int32_t num_array_containers = 0;
+
+ int32_t i; for (i = 0; i < num_containers; i++) {
+ uint16_t tmp;
+ memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ uint32_t cardinality = tmp + 1;
+ bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+
+ if (isbitmap) {
+ num_bitset_containers++;
+ } else if (isrun) {
+ num_run_containers++;
+ } else {
+ num_array_containers++;
+ }
+ }
+
+ size_t alloc_size = 0;
+ alloc_size += sizeof(roaring_bitmap_t);
+ alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+ alloc_size += num_run_containers * sizeof(run_container_t);
+ alloc_size += num_array_containers * sizeof(array_container_t);
+ alloc_size += num_containers * sizeof(uint16_t); // keys
+ alloc_size += num_containers * sizeof(uint8_t); // typecodes
+
+ // allocate bitmap and construct containers
+ char *arena = (char *)roaring_malloc(alloc_size);
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ roaring_bitmap_t *rb = (roaring_bitmap_t *)
+ arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+ rb->high_low_container.allocation_size = num_containers;
+ rb->high_low_container.size = num_containers;
+ rb->high_low_container.containers =
+ (container_t **)arena_alloc(&arena,
+ sizeof(container_t*) * num_containers);
+
+ uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
+ uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));
+
+ rb->high_low_container.keys = keys;
+ rb->high_low_container.typecodes = typecodes;
+
+ for (i = 0; i < num_containers; i++) {
+ uint16_t tmp;
+ memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ int32_t cardinality = tmp + 1;
+ bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+
+ keys[i] = descriptive_headers[2*i];
+
+ if (isbitmap) {
+ typecodes[i] = BITSET_CONTAINER_TYPE;
+ bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t));
+ c->cardinality = cardinality;
+ if(offset_headers != NULL) {
+ c->words = (uint64_t *) (start_of_buf + offset_headers[i]);
+ } else {
+ c->words = (uint64_t *) buf;
+ buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ } else if (isrun) {
+ typecodes[i] = RUN_CONTAINER_TYPE;
+ run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
+ c->capacity = cardinality;
+ uint16_t n_runs;
+ if(offset_headers != NULL) {
+ memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t));
+ c->n_runs = n_runs;
+ c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t));
+ } else {
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ c->n_runs = n_runs;
+ buf += sizeof(uint16_t);
+ c->runs = (rle16_t *) buf;
+ buf += c->n_runs * sizeof(rle16_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ } else {
+ typecodes[i] = ARRAY_CONTAINER_TYPE;
+ array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t));
+ c->cardinality = cardinality;
+ c->capacity = cardinality;
+ if(offset_headers != NULL) {
+ c->array = (uint16_t *) (start_of_buf + offset_headers[i]);
+ } else {
+ c->array = (uint16_t *) buf;
+ buf += cardinality * sizeof(uint16_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ }
+ }
+
+ return rb;
+}
+
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring {
+#endif
+/* end file src/roaring.c */
+/* begin file src/roaring_array.c */
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+// Convention: [0,ra->size) all elements are initialized
+// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing
+
+extern inline int32_t ra_get_size(const roaring_array_t *ra);
+extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
+
+extern inline container_t *ra_get_container_at_index(
+ const roaring_array_t *ra, uint16_t i,
+ uint8_t *typecode);
+
+extern inline void ra_unshare_container_at_index(roaring_array_t *ra,
+ uint16_t i);
+
+extern inline void ra_replace_key_and_container_at_index(
+ roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode);
+
+extern inline void ra_set_container_at_index(
+ const roaring_array_t *ra, int32_t i,
+ container_t *c, uint8_t typecode);
+
+static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
+ //
+ // Note: not implemented using C's realloc(), because the memory layout is
+ // Struct-of-Arrays vs. Array-of-Structs:
+ // https://github.com/RoaringBitmap/CRoaring/issues/256
+
+ if ( new_capacity == 0 ) {
+ roaring_free(ra->containers);
+ ra->containers = NULL;
+ ra->keys = NULL;
+ ra->typecodes = NULL;
+ ra->allocation_size = 0;
+ return true;
+ }
+ const size_t memoryneeded = new_capacity * (
+ sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
+ void *bigalloc = roaring_malloc(memoryneeded);
+ if (!bigalloc) return false;
+ void *oldbigalloc = ra->containers;
+ container_t **newcontainers = (container_t **)bigalloc;
+ uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
+ uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
+ assert((char *)(newtypecodes + new_capacity) ==
+ (char *)bigalloc + memoryneeded);
+ if(ra->size > 0) {
+ memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size);
+ memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
+ memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
+ }
+ ra->containers = newcontainers;
+ ra->keys = newkeys;
+ ra->typecodes = newtypecodes;
+ ra->allocation_size = new_capacity;
+ roaring_free(oldbigalloc);
+ return true;
+}
+
+bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
+ if (!new_ra) return false;
+ ra_init(new_ra);
+
+ if (cap > INT32_MAX) { return false; }
+
+ if(cap > 0) {
+ void *bigalloc = roaring_malloc(cap *
+ (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)));
+ if( bigalloc == NULL ) return false;
+ new_ra->containers = (container_t **)bigalloc;
+ new_ra->keys = (uint16_t *)(new_ra->containers + cap);
+ new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
+ // Narrowing is safe because of above check
+ new_ra->allocation_size = (int32_t)cap;
+ }
+ return true;
+}
+
+int ra_shrink_to_fit(roaring_array_t *ra) {
+ int savings = (ra->allocation_size - ra->size) *
+ (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
+ if (!realloc_array(ra, ra->size)) {
+ return 0;
+ }
+ ra->allocation_size = ra->size;
+ return savings;
+}
+
+void ra_init(roaring_array_t *new_ra) {
+ if (!new_ra) { return; }
+ new_ra->keys = NULL;
+ new_ra->containers = NULL;
+ new_ra->typecodes = NULL;
+
+ new_ra->allocation_size = 0;
+ new_ra->size = 0;
+ new_ra->flags = 0;
+}
+
+bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
+ bool copy_on_write) {
+ ra_clear_containers(dest); // we are going to overwrite them
+ if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size
+ dest->size = 0; // <--- This is important.
+ return true; // output was just cleared, so they match
+ }
+ if (dest->allocation_size < source->size) {
+ if (!realloc_array(dest, source->size)) {
+ return false;
+ }
+ }
+ dest->size = source->size;
+ memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
+ // we go through the containers, turning them into shared containers...
+ if (copy_on_write) {
+ int32_t i; for (i = 0; i < dest->size; ++i) {
+ source->containers[i] = get_copy_of_container(
+ source->containers[i], &source->typecodes[i], copy_on_write);
+ }
+ // we do a shallow copy to the other bitmap
+ memcpy(dest->containers, source->containers,
+ dest->size * sizeof(container_t *));
+ memcpy(dest->typecodes, source->typecodes,
+ dest->size * sizeof(uint8_t));
+ } else {
+ memcpy(dest->typecodes, source->typecodes,
+ dest->size * sizeof(uint8_t));
+ int32_t i; for (i = 0; i < dest->size; i++) {
+ dest->containers[i] =
+ container_clone(source->containers[i], source->typecodes[i]);
+ if (dest->containers[i] == NULL) {
+ int32_t j; for ( j = 0; j < i; j++) {
+ container_free(dest->containers[j], dest->typecodes[j]);
+ }
+ ra_clear_without_containers(dest);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void ra_clear_containers(roaring_array_t *ra) {
+ int32_t i; for (i = 0; i < ra->size; ++i) {
+ container_free(ra->containers[i], ra->typecodes[i]);
+ }
+}
+
+void ra_reset(roaring_array_t *ra) {
+ ra_clear_containers(ra);
+ ra->size = 0;
+ ra_shrink_to_fit(ra);
+}
+
+void ra_clear_without_containers(roaring_array_t *ra) {
+ roaring_free(ra->containers); // keys and typecodes are allocated with containers
+ ra->size = 0;
+ ra->allocation_size = 0;
+ ra->containers = NULL;
+ ra->keys = NULL;
+ ra->typecodes = NULL;
+}
+
+void ra_clear(roaring_array_t *ra) {
+ ra_clear_containers(ra);
+ ra_clear_without_containers(ra);
+}
+
+bool extend_array(roaring_array_t *ra, int32_t k) {
+ int32_t desired_size = ra->size + k;
+ const int32_t max_containers = 65536;
+ assert(desired_size <= max_containers);
+ if (desired_size > ra->allocation_size) {
+ int32_t new_capacity =
+ (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
+ if (new_capacity > max_containers) {
+ new_capacity = max_containers;
+ }
+
+ return realloc_array(ra, new_capacity);
+ }
+ return true;
+}
+
+void ra_append(
+ roaring_array_t *ra, uint16_t key,
+ container_t *c, uint8_t typecode
+){
+ extend_array(ra, 1);
+ const int32_t pos = ra->size;
+
+ ra->keys[pos] = key;
+ ra->containers[pos] = c;
+ ra->typecodes[pos] = typecode;
+ ra->size++;
+}
+
+void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t index, bool copy_on_write) {
+ extend_array(ra, 1);
+ const int32_t pos = ra->size;
+
+ // old contents is junk not needing freeing
+ ra->keys[pos] = sa->keys[index];
+ // the shared container will be in two bitmaps
+ if (copy_on_write) {
+ sa->containers[index] = get_copy_of_container(
+ sa->containers[index], &sa->typecodes[index], copy_on_write);
+ ra->containers[pos] = sa->containers[index];
+ ra->typecodes[pos] = sa->typecodes[index];
+ } else {
+ ra->containers[pos] =
+ container_clone(sa->containers[index], sa->typecodes[index]);
+ ra->typecodes[pos] = sa->typecodes[index];
+ }
+ ra->size++;
+}
+
+void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t stopping_key, bool copy_on_write) {
+ int32_t i; for (i = 0; i < sa->size; ++i) {
+ if (sa->keys[i] >= stopping_key) break;
+ ra_append_copy(ra, sa, i, copy_on_write);
+ }
+}
+
+void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
+ int32_t start_index, int32_t end_index,
+ bool copy_on_write) {
+ extend_array(ra, end_index - start_index);
+ int32_t i; for (i = start_index; i < end_index; ++i) {
+ const int32_t pos = ra->size;
+ ra->keys[pos] = sa->keys[i];
+ if (copy_on_write) {
+ sa->containers[i] = get_copy_of_container(
+ sa->containers[i], &sa->typecodes[i], copy_on_write);
+ ra->containers[pos] = sa->containers[i];
+ ra->typecodes[pos] = sa->typecodes[i];
+ } else {
+ ra->containers[pos] =
+ container_clone(sa->containers[i], sa->typecodes[i]);
+ ra->typecodes[pos] = sa->typecodes[i];
+ }
+ ra->size++;
+ }
+}
+
+void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
+ uint16_t before_start, bool copy_on_write) {
+ int start_location = ra_get_index(sa, before_start);
+ if (start_location >= 0)
+ ++start_location;
+ else
+ start_location = -start_location - 1;
+ ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write);
+}
+
+void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
+ int32_t start_index, int32_t end_index) {
+ extend_array(ra, end_index - start_index);
+
+ int32_t i; for (i = start_index; i < end_index; ++i) {
+ const int32_t pos = ra->size;
+
+ ra->keys[pos] = sa->keys[i];
+ ra->containers[pos] = sa->containers[i];
+ ra->typecodes[pos] = sa->typecodes[i];
+ ra->size++;
+ }
+}
+
+void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
+ int32_t start_index, int32_t end_index,
+ bool copy_on_write) {
+ extend_array(ra, end_index - start_index);
+
+ int32_t i; for (i = start_index; i < end_index; ++i) {
+ const int32_t pos = ra->size;
+ ra->keys[pos] = sa->keys[i];
+ if (copy_on_write) {
+ sa->containers[i] = get_copy_of_container(
+ sa->containers[i], &sa->typecodes[i], copy_on_write);
+ ra->containers[pos] = sa->containers[i];
+ ra->typecodes[pos] = sa->typecodes[i];
+ } else {
+ ra->containers[pos] =
+ container_clone(sa->containers[i], sa->typecodes[i]);
+ ra->typecodes[pos] = sa->typecodes[i];
+ }
+ ra->size++;
+ }
+}
+
+container_t *ra_get_container(
+ roaring_array_t *ra, uint16_t x, uint8_t *typecode
+){
+ int i = binarySearch(ra->keys, (int32_t)ra->size, x);
+ if (i < 0) return NULL;
+ *typecode = ra->typecodes[i];
+ return ra->containers[i];
+}
+
+extern inline container_t *ra_get_container_at_index(
+ const roaring_array_t *ra, uint16_t i,
+ uint8_t *typecode);
+
+extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra,
+ uint16_t i);
+
+extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
+
+extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
+ int32_t pos);
+
+// everything skipped over is freed
+int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
+ while (pos < ra->size && ra->keys[pos] < x) {
+ container_free(ra->containers[pos], ra->typecodes[pos]);
+ ++pos;
+ }
+ return pos;
+}
+
+void ra_insert_new_key_value_at(
+ roaring_array_t *ra, int32_t i, uint16_t key,
+ container_t *c, uint8_t typecode
+){
+ extend_array(ra, 1);
+ // May be an optimization opportunity with DIY memmove
+ memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
+ sizeof(uint16_t) * (ra->size - i));
+ memmove(&(ra->containers[i + 1]), &(ra->containers[i]),
+ sizeof(container_t *) * (ra->size - i));
+ memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]),
+ sizeof(uint8_t) * (ra->size - i));
+ ra->keys[i] = key;
+ ra->containers[i] = c;
+ ra->typecodes[i] = typecode;
+ ra->size++;
+}
+
+// note: Java routine set things to 0, enabling GC.
+// Java called it "resize" but it was always used to downsize.
+// Allowing upsize would break the conventions about
+// valid containers below ra->size.
+
+void ra_downsize(roaring_array_t *ra, int32_t new_length) {
+ assert(new_length <= ra->size);
+ ra->size = new_length;
+}
+
+void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
+ memmove(&(ra->containers[i]), &(ra->containers[i + 1]),
+ sizeof(container_t *) * (ra->size - i - 1));
+ memmove(&(ra->keys[i]), &(ra->keys[i + 1]),
+ sizeof(uint16_t) * (ra->size - i - 1));
+ memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]),
+ sizeof(uint8_t) * (ra->size - i - 1));
+ ra->size--;
+}
+
+void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
+ container_free(ra->containers[i], ra->typecodes[i]);
+ ra_remove_at_index(ra, i);
+}
+
+// used in inplace andNot only, to slide left the containers from
+// the mutated RoaringBitmap that are after the largest container of
+// the argument RoaringBitmap. In use it should be followed by a call to
+// downsize.
+//
+void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
+ uint32_t new_begin) {
+ assert(begin <= end);
+ assert(new_begin < begin);
+
+ const int range = end - begin;
+
+ // We ensure to previously have freed overwritten containers
+ // that are not copied elsewhere
+
+ memmove(&(ra->containers[new_begin]), &(ra->containers[begin]),
+ sizeof(container_t *) * range);
+ memmove(&(ra->keys[new_begin]), &(ra->keys[begin]),
+ sizeof(uint16_t) * range);
+ memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]),
+ sizeof(uint8_t) * range);
+}
+
+void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
+ if (distance > 0) {
+ extend_array(ra, distance);
+ }
+ int32_t srcpos = ra->size - count;
+ int32_t dstpos = srcpos + distance;
+ memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]),
+ sizeof(uint16_t) * count);
+ memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
+ sizeof(container_t *) * count);
+ memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
+ sizeof(uint8_t) * count);
+ ra->size += distance;
+}
+
+
+void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
+ size_t ctr = 0;
+ int32_t i; for (i = 0; i < ra->size; ++i) {
+ int num_added = container_to_uint32_array(
+ ans + ctr, ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ ctr += num_added;
+ }
+}
+
+bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
+ size_t ctr = 0;
+ size_t dtr = 0;
+
+ size_t t_limit = 0;
+
+ bool first = false;
+ size_t first_skip = 0;
+
+ uint32_t *t_ans = NULL;
+ size_t cur_len = 0;
+
+ int i; for (i = 0; i < ra->size; ++i) {
+
+ const container_t *c = container_unwrap_shared(
+ ra->containers[i], &ra->typecodes[i]);
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ t_limit = (const_CAST_bitset(c))->cardinality;
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ t_limit = (const_CAST_array(c))->cardinality;
+ break;
+ case RUN_CONTAINER_TYPE:
+ t_limit = run_container_cardinality(const_CAST_run(c));
+ break;
+ }
+ if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
+ if (!first){
+ //first_skip = t_limit - (ctr + t_limit - offset);
+ first_skip = offset - ctr;
+ first = true;
+ t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * (first_skip + limit));
+ if(t_ans == NULL) {
+ return false;
+ }
+ memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ;
+ cur_len = first_skip + limit;
+ }
+ if (dtr + t_limit > cur_len){
+ uint32_t * append_ans = (uint32_t *)roaring_malloc(sizeof(*append_ans) * (cur_len + t_limit));
+ if(append_ans == NULL) {
+ if(t_ans != NULL) roaring_free(t_ans);
+ return false;
+ }
+ memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit));
+ cur_len = cur_len + t_limit;
+ memcpy(append_ans, t_ans, dtr * sizeof(uint32_t));
+ roaring_free(t_ans);
+ t_ans = append_ans;
+ }
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ container_to_uint32_array(
+ t_ans + dtr,
+ const_CAST_bitset(c), ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ container_to_uint32_array(
+ t_ans + dtr,
+ const_CAST_array(c), ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ break;
+ case RUN_CONTAINER_TYPE:
+ container_to_uint32_array(
+ t_ans + dtr,
+ const_CAST_run(c), ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+ break;
+ }
+ dtr += t_limit;
+ }
+ ctr += t_limit;
+ if (dtr-first_skip >= limit) break;
+ }
+ if(t_ans != NULL) {
+ memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t));
+ free(t_ans);
+ }
+ return true;
+}
+
+bool ra_has_run_container(const roaring_array_t *ra) {
+ int32_t k; for ( k = 0; k < ra->size; ++k) {
+ if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
+ RUN_CONTAINER_TYPE)
+ return true;
+ }
+ return false;
+}
+
+uint32_t ra_portable_header_size(const roaring_array_t *ra) {
+ if (ra_has_run_container(ra)) {
+ if (ra->size <
+ NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets
+ return 4 + (ra->size + 7) / 8 + 4 * ra->size;
+ }
+ return 4 + (ra->size + 7) / 8 +
+ 8 * ra->size; // - 4 because we pack the size with the cookie
+ } else {
+ return 4 + 4 + 8 * ra->size;
+ }
+}
+
+size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
+ size_t count = ra_portable_header_size(ra);
+
+ int32_t k; for ( k = 0; k < ra->size; ++k) {
+ count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
+ }
+ return count;
+}
+
+// This function is endian-sensitive.
+size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
+ char *initbuf = buf;
+ uint32_t startOffset = 0;
+ bool hasrun = ra_has_run_container(ra);
+ if (hasrun) {
+ uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
+ memcpy(buf, &cookie, sizeof(cookie));
+ buf += sizeof(cookie);
+ uint32_t s = (ra->size + 7) / 8;
+ uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1);
+ assert(bitmapOfRunContainers != NULL); // todo: handle
+ int32_t i; for (i = 0; i < ra->size; ++i) {
+ if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
+ RUN_CONTAINER_TYPE) {
+ bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
+ }
+ }
+ memcpy(buf, bitmapOfRunContainers, s);
+ buf += s;
+ roaring_free(bitmapOfRunContainers);
+ if (ra->size < NO_OFFSET_THRESHOLD) {
+ startOffset = 4 + 4 * ra->size + s;
+ } else {
+ startOffset = 4 + 8 * ra->size + s;
+ }
+ } else { // backwards compatibility
+ uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
+
+ memcpy(buf, &cookie, sizeof(cookie));
+ buf += sizeof(cookie);
+ memcpy(buf, &ra->size, sizeof(ra->size));
+ buf += sizeof(ra->size);
+
+ startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
+ }
+ int32_t k; for ( k = 0; k < ra->size; ++k) {
+ memcpy(buf, &ra->keys[k], sizeof(ra->keys[k]));
+ buf += sizeof(ra->keys[k]);
+ // get_cardinality returns a value in [1,1<<16], subtracting one
+ // we get [0,1<<16 - 1] which fits in 16 bits
+ uint16_t card = (uint16_t)(
+ container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1);
+ memcpy(buf, &card, sizeof(card));
+ buf += sizeof(card);
+ }
+ if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
+ // writing the containers offsets
+ int32_t k; for ( k = 0; k < ra->size; k++) {
+ memcpy(buf, &startOffset, sizeof(startOffset));
+ buf += sizeof(startOffset);
+ startOffset =
+ startOffset +
+ container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
+ }
+ }
+ for ( k = 0; k < ra->size; ++k) {
+ buf += container_write(ra->containers[k], ra->typecodes[k], buf);
+ }
+ return buf - initbuf;
+}
+
+// Quickly checks whether there is a serialized bitmap at the pointer,
+// not exceeding size "maxbytes" in bytes. This function does not allocate
+// memory dynamically.
+//
+// This function returns 0 if and only if no valid bitmap is found.
+// Otherwise, it returns how many bytes are occupied.
+//
+size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
+ size_t bytestotal = sizeof(int32_t);// for cookie
+ if(bytestotal > maxbytes) return 0;
+ uint32_t cookie;
+ memcpy(&cookie, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
+ cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
+ return 0;
+ }
+ int32_t size;
+
+ if ((cookie & 0xFFFF) == SERIAL_COOKIE)
+ size = (cookie >> 16) + 1;
+ else {
+ bytestotal += sizeof(int32_t);
+ if(bytestotal > maxbytes) return 0;
+ memcpy(&size, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ }
+ if (size > (1<<16)) {
+ return 0; // logically impossible
+ }
+ char *bitmapOfRunContainers = NULL;
+ bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
+ if (hasrun) {
+ int32_t s = (size + 7) / 8;
+ bytestotal += s;
+ if(bytestotal > maxbytes) return 0;
+ bitmapOfRunContainers = (char *)buf;
+ buf += s;
+ }
+ bytestotal += size * 2 * sizeof(uint16_t);
+ if(bytestotal > maxbytes) return 0;
+ uint16_t *keyscards = (uint16_t *)buf;
+ buf += size * 2 * sizeof(uint16_t);
+ if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
+ // skipping the offsets
+ bytestotal += size * 4;
+ if(bytestotal > maxbytes) return 0;
+ buf += size * 4;
+ }
+ // Reading the containers
+ int32_t k; for ( k = 0; k < size; ++k) {
+ uint16_t tmp;
+ memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
+ uint32_t thiscard = tmp + 1;
+ bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+ if (isbitmap) {
+ size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ bytestotal += containersize;
+ if(bytestotal > maxbytes) return 0;
+ buf += containersize;
+ } else if (isrun) {
+ bytestotal += sizeof(uint16_t);
+ if(bytestotal > maxbytes) return 0;
+ uint16_t n_runs;
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ buf += sizeof(uint16_t);
+ size_t containersize = n_runs * sizeof(rle16_t);
+ bytestotal += containersize;
+ if(bytestotal > maxbytes) return 0;
+ buf += containersize;
+ } else {
+ size_t containersize = thiscard * sizeof(uint16_t);
+ bytestotal += containersize;
+ if(bytestotal > maxbytes) return 0;
+ buf += containersize;
+ }
+ }
+ return bytestotal;
+}
+
+// this function populates answer from the content of buf (reading up to maxbytes bytes).
+// The function returns false if a properly serialized bitmap cannot be found.
+// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
+//
+// This function is endian-sensitive.
+bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
+ *readbytes = sizeof(int32_t);// for cookie
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
+ return false;
+ }
+ uint32_t cookie;
+ memcpy(&cookie, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
+ cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
+ fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
+ cookie);
+ return false;
+ }
+ int32_t size;
+
+ if ((cookie & 0xFFFF) == SERIAL_COOKIE)
+ size = (cookie >> 16) + 1;
+ else {
+ *readbytes += sizeof(int32_t);
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
+ return false;
+ }
+ memcpy(&size, buf, sizeof(int32_t));
+ buf += sizeof(uint32_t);
+ }
+ if (size < 0) {
+ fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n",
+ size);
+ return false; // logically impossible
+ }
+ if (size > (1<<16)) {
+ fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
+ size);
+ return false; // logically impossible
+ }
+ const char *bitmapOfRunContainers = NULL;
+ bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
+ if (hasrun) {
+ int32_t s = (size + 7) / 8;
+ *readbytes += s;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
+ return false;
+ }
+ bitmapOfRunContainers = buf;
+ buf += s;
+ }
+ uint16_t *keyscards = (uint16_t *)buf;
+
+ *readbytes += size * 2 * sizeof(uint16_t);
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
+ return false;
+ }
+ buf += size * 2 * sizeof(uint16_t);
+
+ bool is_ok = ra_init_with_capacity(answer, size);
+ if (!is_ok) {
+ fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
+ return false;
+ }
+
+ int32_t k; for ( k = 0; k < size; ++k) {
+ uint16_t tmp;
+ memcpy(&tmp, keyscards + 2*k, sizeof(tmp));
+ answer->keys[k] = tmp;
+ }
+ if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
+ *readbytes += size * 4;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Ran out of bytes while reading offsets.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+
+ // skipping the offsets
+ buf += size * 4;
+ }
+ // Reading the containers
+ for ( k = 0; k < size; ++k) {
+ uint16_t tmp;
+ memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
+ uint32_t thiscard = tmp + 1;
+ bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+ if (isbitmap) {
+ // we check that the read is allowed
+ size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ *readbytes += containersize;
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ // it is now safe to read
+ bitset_container_t *c = bitset_container_create();
+ if(c == NULL) {// memory allocation failure
+ fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ answer->size++;
+ buf += bitset_container_read(thiscard, c, buf);
+ answer->containers[k] = c;
+ answer->typecodes[k] = BITSET_CONTAINER_TYPE;
+ } else if (isrun) {
+ // we check that the read is allowed
+ *readbytes += sizeof(uint16_t);
+ if(*readbytes > maxbytes) {
+ fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ uint16_t n_runs;
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ size_t containersize = n_runs * sizeof(rle16_t);
+ *readbytes += containersize;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Running out of bytes while reading a run container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ // it is now safe to read
+
+ run_container_t *c = run_container_create();
+ if(c == NULL) {// memory allocation failure
+ fprintf(stderr, "Failed to allocate memory for a run container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ answer->size++;
+ buf += run_container_read(thiscard, c, buf);
+ answer->containers[k] = c;
+ answer->typecodes[k] = RUN_CONTAINER_TYPE;
+ } else {
+ // we check that the read is allowed
+ size_t containersize = thiscard * sizeof(uint16_t);
+ *readbytes += containersize;
+ if(*readbytes > maxbytes) {// data is corrupted?
+ fprintf(stderr, "Running out of bytes while reading an array container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ // it is now safe to read
+ array_container_t *c =
+ array_container_create_given_capacity(thiscard);
+ if(c == NULL) {// memory allocation failure
+ fprintf(stderr, "Failed to allocate memory for an array container.\n");
+ ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
+ return false;
+ }
+ answer->size++;
+ buf += array_container_read(thiscard, c, buf);
+ answer->containers[k] = c;
+ answer->typecodes[k] = ARRAY_CONTAINER_TYPE;
+ }
+ }
+ return true;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace internal {
+#endif
+/* end file src/roaring_array.c */
+/* begin file src/roaring_priority_queue.c */
+
+
+#ifdef __cplusplus
+using namespace ::roaring::internal;
+
+extern "C" { namespace roaring { namespace api {
+#endif
+
+struct roaring_pq_element_s {
+ uint64_t size;
+ bool is_temporary;
+ roaring_bitmap_t *bitmap;
+};
+
+typedef struct roaring_pq_element_s roaring_pq_element_t;
+
+struct roaring_pq_s {
+ roaring_pq_element_t *elements;
+ uint64_t size;
+};
+
+typedef struct roaring_pq_s roaring_pq_t;
+
+static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) {
+ return t1->size < t2->size;
+}
+
+static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) {
+ uint64_t i = pq->size;
+ pq->elements[pq->size++] = *t;
+ while (i > 0) {
+ uint64_t p = (i - 1) >> 1;
+ roaring_pq_element_t ap = pq->elements[p];
+ if (!compare(t, &ap)) break;
+ pq->elements[i] = ap;
+ i = p;
+ }
+ pq->elements[i] = *t;
+}
+
+static void pq_free(roaring_pq_t *pq) {
+ roaring_free(pq);
+}
+
+static void percolate_down(roaring_pq_t *pq, uint32_t i) {
+ uint32_t size = (uint32_t)pq->size;
+ uint32_t hsize = size >> 1;
+ roaring_pq_element_t ai = pq->elements[i];
+ while (i < hsize) {
+ uint32_t l = (i << 1) + 1;
+ uint32_t r = l + 1;
+ roaring_pq_element_t bestc = pq->elements[l];
+ if (r < size) {
+ if (compare(pq->elements + r, &bestc)) {
+ l = r;
+ bestc = pq->elements[r];
+ }
+ }
+ if (!compare(&bestc, &ai)) {
+ break;
+ }
+ pq->elements[i] = bestc;
+ i = l;
+ }
+ pq->elements[i] = ai;
+}
+
+static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) {
+ size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length;
+ roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size);
+ answer->elements = (roaring_pq_element_t *)(answer + 1);
+ answer->size = length;
+ uint32_t i; for ( i = 0; i < length; i++) {
+ answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i];
+ answer->elements[i].is_temporary = false;
+ answer->elements[i].size =
+ roaring_bitmap_portable_size_in_bytes(arr[i]);
+ }
+ {
+ int i;
+ for (i = (length >> 1); i >= 0; i--) {
+ percolate_down(answer, i);
+ }
+ }
+ return answer;
+}
+
+static roaring_pq_element_t pq_poll(roaring_pq_t *pq) {
+ roaring_pq_element_t ans = *pq->elements;
+ if (pq->size > 1) {
+ pq->elements[0] = pq->elements[--pq->size];
+ percolate_down(pq, 0);
+ } else
+ --pq->size;
+ // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size;
+ return ans;
+}
+
+// this function consumes and frees the inputs
+static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
+ roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = ra_get_size(&x1->high_low_container),
+ length2 = ra_get_size(&x2->high_low_container);
+ if (0 == length1) {
+ roaring_bitmap_free(x1);
+ return x2;
+ }
+ if (0 == length2) {
+ roaring_bitmap_free(x2);
+ return x1;
+ }
+ uint32_t neededcap = length1 > length2 ? length2 : length1;
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ // todo: unsharing can be inefficient as it may create a clone where
+ // none
+ // is needed, but it has the benefit of being easy to reason about.
+
+ ra_unshare_container_at_index(&x1->high_low_container, pos1);
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ assert(type1 != SHARED_CONTAINER_TYPE);
+
+ ra_unshare_container_at_index(&x2->high_low_container, pos2);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ assert(type2 != SHARED_CONTAINER_TYPE);
+
+ container_t *c;
+
+ if ((type2 == BITSET_CONTAINER_TYPE) &&
+ (type1 != BITSET_CONTAINER_TYPE)
+ ){
+ c = container_lazy_ior(c2, type2, c1, type1, &result_type);
+ container_free(c1, type1);
+ if (c != c2) {
+ container_free(c2, type2);
+ }
+ } else {
+ c = container_lazy_ior(c1, type1, c2, type2, &result_type);
+ container_free(c2, type2);
+ if (c != c1) {
+ container_free(c1, type1);
+ }
+ }
+ // since we assume that the initial containers are non-empty, the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_move_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2);
+ } else if (pos2 == length2) {
+ ra_append_move_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1);
+ }
+ ra_clear_without_containers(&x1->high_low_container);
+ ra_clear_without_containers(&x2->high_low_container);
+ roaring_free(x1);
+ roaring_free(x2);
+ return answer;
+}
+
+/**
+ * Compute the union of 'number' bitmaps using a heap. This can
+ * sometimes be faster than roaring_bitmap_or_many which uses
+ * a naive algorithm. Caller is responsible for freeing the
+ * result.
+ */
+roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_pq_t *pq = create_pq(x, number);
+ while (pq->size > 1) {
+ roaring_pq_element_t x1 = pq_poll(pq);
+ roaring_pq_element_t x2 = pq_poll(pq);
+
+ if (x1.is_temporary && x2.is_temporary) {
+ roaring_bitmap_t *newb =
+ lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap);
+ // should normally return a fresh new bitmap *except* that
+ // it can return x1.bitmap or x2.bitmap in degenerate cases
+ bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap));
+ uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
+ roaring_pq_element_t newelement = {
+ .size = bsize, .is_temporary = temporary, .bitmap = newb};
+ pq_add(pq, &newelement);
+ } else if (x2.is_temporary) {
+ roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false);
+ x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap);
+ pq_add(pq, &x2);
+ } else if (x1.is_temporary) {
+ roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false);
+ x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap);
+
+ pq_add(pq, &x1);
+ } else {
+ roaring_bitmap_t *newb =
+ roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false);
+ uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
+ roaring_pq_element_t newelement = {
+ .size = bsize, .is_temporary = true, .bitmap = newb};
+
+ pq_add(pq, &newelement);
+ }
+ }
+ roaring_pq_element_t X = pq_poll(pq);
+ roaring_bitmap_t *answer = X.bitmap;
+ roaring_bitmap_repair_after_lazy(answer);
+ pq_free(pq);
+ return answer;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace api {
+#endif
+/* end file src/roaring_priority_queue.c */
+#endif
diff --git a/tests/cfgs/caches_cfg/result/teams.pcap.out b/tests/cfgs/caches_cfg/result/teams.pcap.out
index 33d92f37d..b50feef67 100644
--- a/tests/cfgs/caches_cfg/result/teams.pcap.out
+++ b/tests/cfgs/caches_cfg/result/teams.pcap.out
@@ -7,7 +7,7 @@ Confidence Unknown : 1 (flows)
Confidence Match by port : 1 (flows)
Confidence DPI (partial) : 1 (flows)
Confidence DPI : 80 (flows)
-Num dissector calls: 534 (6.43 diss/flow)
+Num dissector calls: 535 (6.45 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/9/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/caches_global/result/lru_ipv6_caches.pcapng.out b/tests/cfgs/caches_global/result/lru_ipv6_caches.pcapng.out
index 20839c32c..c2f44ace6 100644
--- a/tests/cfgs/caches_global/result/lru_ipv6_caches.pcapng.out
+++ b/tests/cfgs/caches_global/result/lru_ipv6_caches.pcapng.out
@@ -2,7 +2,7 @@ DPI Packets (TCP): 9 (3.00 pkts/flow)
DPI Packets (UDP): 30 (3.33 pkts/flow)
Confidence DPI (cache) : 4 (flows)
Confidence DPI : 8 (flows)
-Num dissector calls: 649 (54.08 diss/flow)
+Num dissector calls: 651 (54.25 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 25/7/2 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/caches_global/result/teams.pcap.out b/tests/cfgs/caches_global/result/teams.pcap.out
index f14a35136..ca501429d 100644
--- a/tests/cfgs/caches_global/result/teams.pcap.out
+++ b/tests/cfgs/caches_global/result/teams.pcap.out
@@ -7,7 +7,7 @@ Confidence Unknown : 1 (flows)
Confidence Match by port : 1 (flows)
Confidence DPI (partial) : 5 (flows)
Confidence DPI : 76 (flows)
-Num dissector calls: 534 (6.43 diss/flow)
+Num dissector calls: 535 (6.45 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/9/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/caches_global/result/zoom_p2p.pcapng.out b/tests/cfgs/caches_global/result/zoom_p2p.pcapng.out
index 9c6eec15a..7d9ba148a 100644
--- a/tests/cfgs/caches_global/result/zoom_p2p.pcapng.out
+++ b/tests/cfgs/caches_global/result/zoom_p2p.pcapng.out
@@ -4,7 +4,7 @@ DPI Packets (UDP): 49 (4.90 pkts/flow)
DPI Packets (other): 2 (1.00 pkts/flow)
Confidence DPI (partial cache): 4 (flows)
Confidence DPI : 8 (flows)
-Num dissector calls: 845 (70.42 diss/flow)
+Num dissector calls: 849 (70.75 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/12/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/pcap/lol_wild_rift_udp.pcap b/tests/cfgs/default/pcap/lol_wild_rift_udp.pcap
new file mode 100644
index 000000000..32a7e2b18
--- /dev/null
+++ b/tests/cfgs/default/pcap/lol_wild_rift_udp.pcap
Binary files differ
diff --git a/tests/cfgs/default/pcap/tencent_games.pcap b/tests/cfgs/default/pcap/tencent_games.pcap
index 9db3bd1c0..96f5ad3fd 100644
--- a/tests/cfgs/default/pcap/tencent_games.pcap
+++ b/tests/cfgs/default/pcap/tencent_games.pcap
Binary files differ
diff --git a/tests/cfgs/default/result/1kxun.pcap.out b/tests/cfgs/default/result/1kxun.pcap.out
index 4c4aea1f8..f79239392 100644
--- a/tests/cfgs/default/result/1kxun.pcap.out
+++ b/tests/cfgs/default/result/1kxun.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 120 (1.21 pkts/flow)
Confidence Unknown : 14 (flows)
Confidence Match by port : 6 (flows)
Confidence DPI : 177 (flows)
-Num dissector calls: 5038 (25.57 diss/flow)
+Num dissector calls: 5052 (25.64 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/60/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/4in4tunnel.pcap.out b/tests/cfgs/default/result/4in4tunnel.pcap.out
index aa7e387d0..6a4177669 100644
--- a/tests/cfgs/default/result/4in4tunnel.pcap.out
+++ b/tests/cfgs/default/result/4in4tunnel.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 5 (5.00 pkts/flow)
Confidence Unknown : 1 (flows)
-Num dissector calls: 194 (194.00 diss/flow)
+Num dissector calls: 195 (195.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/6in6tunnel.pcap.out b/tests/cfgs/default/result/6in6tunnel.pcap.out
index b11cfdd4f..ba4e512b5 100644
--- a/tests/cfgs/default/result/6in6tunnel.pcap.out
+++ b/tests/cfgs/default/result/6in6tunnel.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence Unknown : 1 (flows)
-Num dissector calls: 150 (150.00 diss/flow)
+Num dissector calls: 151 (151.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/EAQ.pcap.out b/tests/cfgs/default/result/EAQ.pcap.out
index f32db376c..46579c064 100644
--- a/tests/cfgs/default/result/EAQ.pcap.out
+++ b/tests/cfgs/default/result/EAQ.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 12 (6.00 pkts/flow)
DPI Packets (UDP): 116 (4.00 pkts/flow)
Confidence DPI : 31 (flows)
-Num dissector calls: 5039 (162.55 diss/flow)
+Num dissector calls: 5068 (163.48 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/FAX-Call-t38-CA-TDM-SIP-FB-1.pcap.out b/tests/cfgs/default/result/FAX-Call-t38-CA-TDM-SIP-FB-1.pcap.out
index 391619796..09ad25063 100644
--- a/tests/cfgs/default/result/FAX-Call-t38-CA-TDM-SIP-FB-1.pcap.out
+++ b/tests/cfgs/default/result/FAX-Call-t38-CA-TDM-SIP-FB-1.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 7 (1.40 pkts/flow)
Confidence DPI : 5 (flows)
-Num dissector calls: 159 (31.80 diss/flow)
+Num dissector calls: 160 (32.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/KakaoTalk_talk.pcap.out b/tests/cfgs/default/result/KakaoTalk_talk.pcap.out
index 15ce36f40..f237b0e42 100644
--- a/tests/cfgs/default/result/KakaoTalk_talk.pcap.out
+++ b/tests/cfgs/default/result/KakaoTalk_talk.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 10 (2.00 pkts/flow)
Confidence Match by port : 8 (flows)
Confidence DPI : 11 (flows)
Confidence Match by IP : 1 (flows)
-Num dissector calls: 1226 (61.30 diss/flow)
+Num dissector calls: 1228 (61.40 diss/flow)
LRU cache ookla: 0/2/0 (insert/search/found)
LRU cache bittorrent: 0/27/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/anyconnect-vpn.pcap.out b/tests/cfgs/default/result/anyconnect-vpn.pcap.out
index 59e3a1705..b57176fa4 100644
--- a/tests/cfgs/default/result/anyconnect-vpn.pcap.out
+++ b/tests/cfgs/default/result/anyconnect-vpn.pcap.out
@@ -6,7 +6,7 @@ DPI Packets (other): 10 (1.00 pkts/flow)
Confidence Unknown : 2 (flows)
Confidence Match by port : 6 (flows)
Confidence DPI : 61 (flows)
-Num dissector calls: 864 (12.52 diss/flow)
+Num dissector calls: 865 (12.54 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/24/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/collectd.pcap.out b/tests/cfgs/default/result/collectd.pcap.out
index 6af69d3cd..7bf63a59d 100644
--- a/tests/cfgs/default/result/collectd.pcap.out
+++ b/tests/cfgs/default/result/collectd.pcap.out
@@ -3,7 +3,7 @@ Guessed flow protos: 3
DPI Packets (UDP): 13 (1.62 pkts/flow)
Confidence Match by port : 3 (flows)
Confidence DPI : 5 (flows)
-Num dissector calls: 476 (59.50 diss/flow)
+Num dissector calls: 479 (59.88 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/9/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/custom_rules_ipv6.pcapng.out b/tests/cfgs/default/result/custom_rules_ipv6.pcapng.out
index 71c9aa481..3fe182fb1 100644
--- a/tests/cfgs/default/result/custom_rules_ipv6.pcapng.out
+++ b/tests/cfgs/default/result/custom_rules_ipv6.pcapng.out
@@ -1,7 +1,7 @@
DPI Packets (UDP): 7 (1.00 pkts/flow)
Confidence Unknown : 1 (flows)
Confidence Match by custom rule: 6 (flows)
-Num dissector calls: 134 (19.14 diss/flow)
+Num dissector calls: 135 (19.29 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/custom_rules_same-ip_multiple_ports.pcapng.out b/tests/cfgs/default/result/custom_rules_same-ip_multiple_ports.pcapng.out
index 9c9573a0d..3c7b385f6 100644
--- a/tests/cfgs/default/result/custom_rules_same-ip_multiple_ports.pcapng.out
+++ b/tests/cfgs/default/result/custom_rules_same-ip_multiple_ports.pcapng.out
@@ -27,6 +27,6 @@ CustomProtocolC 3 222 1
Acceptable 8 592 3
- 1 TCP 192.168.1.245:56866 -> 3.3.3.3:443 [proto: 91.413/TLS.CustomProtocolA][IP: 413/CustomProtocolA][Encrypted][Confidence: Match by custom rule][DPI packets: 1][cat: Web/5][3 pkts/222 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][3.05 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 1 TCP 192.168.1.245:56866 -> 3.3.3.3:443 [proto: 91.414/TLS.CustomProtocolA][IP: 414/CustomProtocolA][Encrypted][Confidence: Match by custom rule][DPI packets: 1][cat: Web/5][3 pkts/222 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][3.05 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2 TCP 192.168.1.245:58288 -> 3.3.3.3:446 [proto: 800/CustomProtocolC][IP: 800/CustomProtocolC][ClearText][Confidence: Match by custom rule][DPI packets: 1][3 pkts/222 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][3.04 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
- 3 TCP 192.168.1.245:59682 -> 3.3.3.3:444 [proto: 414/CustomProtocolB][IP: 414/CustomProtocolB][ClearText][Confidence: Match by custom rule][DPI packets: 1][2 pkts/148 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][1.02 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 3 TCP 192.168.1.245:59682 -> 3.3.3.3:444 [proto: 415/CustomProtocolB][IP: 415/CustomProtocolB][ClearText][Confidence: Match by custom rule][DPI packets: 1][2 pkts/148 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][1.02 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
diff --git a/tests/cfgs/default/result/dhcp-fuzz.pcapng.out b/tests/cfgs/default/result/dhcp-fuzz.pcapng.out
index f994efd93..cfabeb66b 100644
--- a/tests/cfgs/default/result/dhcp-fuzz.pcapng.out
+++ b/tests/cfgs/default/result/dhcp-fuzz.pcapng.out
@@ -2,7 +2,7 @@ Guessed flow protos: 1
DPI Packets (UDP): 1 (1.00 pkts/flow)
Confidence Match by port : 1 (flows)
-Num dissector calls: 138 (138.00 diss/flow)
+Num dissector calls: 139 (139.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/discord.pcap.out b/tests/cfgs/default/result/discord.pcap.out
index a8b3aa810..3f6f8cade 100644
--- a/tests/cfgs/default/result/discord.pcap.out
+++ b/tests/cfgs/default/result/discord.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 5 (5.00 pkts/flow)
DPI Packets (UDP): 60 (1.82 pkts/flow)
Confidence DPI : 34 (flows)
-Num dissector calls: 4696 (138.12 diss/flow)
+Num dissector calls: 4723 (138.91 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/discord_mid_flow.pcap.out b/tests/cfgs/default/result/discord_mid_flow.pcap.out
index 97cd69901..ea75d405b 100644
--- a/tests/cfgs/default/result/discord_mid_flow.pcap.out
+++ b/tests/cfgs/default/result/discord_mid_flow.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 3 (3.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 169 (169.00 diss/flow)
+Num dissector calls: 170 (170.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/dnscrypt-v1-and-resolver-pings.pcap.out b/tests/cfgs/default/result/dnscrypt-v1-and-resolver-pings.pcap.out
index 2995f1fe8..6edbc98de 100644
--- a/tests/cfgs/default/result/dnscrypt-v1-and-resolver-pings.pcap.out
+++ b/tests/cfgs/default/result/dnscrypt-v1-and-resolver-pings.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 256 (1.04 pkts/flow)
Confidence DPI : 245 (flows)
-Num dissector calls: 20374 (83.16 diss/flow)
+Num dissector calls: 20385 (83.20 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/513/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/dnscrypt-v2.pcap.out b/tests/cfgs/default/result/dnscrypt-v2.pcap.out
index e8ae5b3b0..ec0c93c9e 100644
--- a/tests/cfgs/default/result/dnscrypt-v2.pcap.out
+++ b/tests/cfgs/default/result/dnscrypt-v2.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 6 (2.00 pkts/flow)
Confidence DPI : 3 (flows)
-Num dissector calls: 450 (150.00 diss/flow)
+Num dissector calls: 453 (151.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/dnscrypt_skype_false_positive.pcapng.out b/tests/cfgs/default/result/dnscrypt_skype_false_positive.pcapng.out
index 300fa36e7..61226f550 100644
--- a/tests/cfgs/default/result/dnscrypt_skype_false_positive.pcapng.out
+++ b/tests/cfgs/default/result/dnscrypt_skype_false_positive.pcapng.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 150 (150.00 diss/flow)
+Num dissector calls: 151 (151.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/epicgames.pcapng.out b/tests/cfgs/default/result/epicgames.pcapng.out
index 667ec7d5c..d905f780e 100644
--- a/tests/cfgs/default/result/epicgames.pcapng.out
+++ b/tests/cfgs/default/result/epicgames.pcapng.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 12 (3.00 pkts/flow)
Confidence DPI : 4 (flows)
-Num dissector calls: 686 (171.50 diss/flow)
+Num dissector calls: 690 (172.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/fuzz-2006-06-26-2594.pcap.out b/tests/cfgs/default/result/fuzz-2006-06-26-2594.pcap.out
index 9fba2f99b..1663cc972 100644
--- a/tests/cfgs/default/result/fuzz-2006-06-26-2594.pcap.out
+++ b/tests/cfgs/default/result/fuzz-2006-06-26-2594.pcap.out
@@ -6,7 +6,7 @@ DPI Packets (other): 5 (1.00 pkts/flow)
Confidence Unknown : 34 (flows)
Confidence Match by port : 27 (flows)
Confidence DPI : 190 (flows)
-Num dissector calls: 7461 (29.73 diss/flow)
+Num dissector calls: 7499 (29.88 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/189/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/fuzz-2020-02-16-11740.pcap.out b/tests/cfgs/default/result/fuzz-2020-02-16-11740.pcap.out
index 67274835f..f788070b4 100644
--- a/tests/cfgs/default/result/fuzz-2020-02-16-11740.pcap.out
+++ b/tests/cfgs/default/result/fuzz-2020-02-16-11740.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (other): 7 (1.00 pkts/flow)
Confidence Unknown : 19 (flows)
Confidence Match by port : 3 (flows)
Confidence DPI : 55 (flows)
-Num dissector calls: 2269 (29.47 diss/flow)
+Num dissector calls: 2285 (29.68 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/66/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/gnutella.pcap.out b/tests/cfgs/default/result/gnutella.pcap.out
index 8d5541951..8e1a0f92c 100644
--- a/tests/cfgs/default/result/gnutella.pcap.out
+++ b/tests/cfgs/default/result/gnutella.pcap.out
@@ -6,7 +6,7 @@ DPI Packets (other): 10 (1.00 pkts/flow)
Confidence Unknown : 389 (flows)
Confidence Match by port : 1 (flows)
Confidence DPI : 370 (flows)
-Num dissector calls: 50206 (66.06 diss/flow)
+Num dissector calls: 50515 (66.47 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/1170/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/gtp_false_positive.pcapng.out b/tests/cfgs/default/result/gtp_false_positive.pcapng.out
index 55e3a8ea4..10ce43713 100644
--- a/tests/cfgs/default/result/gtp_false_positive.pcapng.out
+++ b/tests/cfgs/default/result/gtp_false_positive.pcapng.out
@@ -3,7 +3,7 @@ Guessed flow protos: 2
DPI Packets (UDP): 7 (2.33 pkts/flow)
Confidence Unknown : 1 (flows)
Confidence Match by port : 2 (flows)
-Num dissector calls: 472 (157.33 diss/flow)
+Num dissector calls: 475 (158.33 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/9/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/h323.pcap.out b/tests/cfgs/default/result/h323.pcap.out
index 03fae4dd6..0a788dd94 100644
--- a/tests/cfgs/default/result/h323.pcap.out
+++ b/tests/cfgs/default/result/h323.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 1 (1.00 pkts/flow)
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence DPI : 2 (flows)
-Num dissector calls: 140 (70.00 diss/flow)
+Num dissector calls: 141 (70.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/http_ipv6.pcap.out b/tests/cfgs/default/result/http_ipv6.pcap.out
index 971aa5493..ca3b5befc 100644
--- a/tests/cfgs/default/result/http_ipv6.pcap.out
+++ b/tests/cfgs/default/result/http_ipv6.pcap.out
@@ -4,7 +4,7 @@ DPI Packets (TCP): 77 (5.92 pkts/flow)
DPI Packets (UDP): 4 (2.00 pkts/flow)
Confidence Match by port : 7 (flows)
Confidence DPI : 8 (flows)
-Num dissector calls: 168 (11.20 diss/flow)
+Num dissector calls: 169 (11.27 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/21/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/imo.pcap.out b/tests/cfgs/default/result/imo.pcap.out
index b09890d35..48ad099ab 100644
--- a/tests/cfgs/default/result/imo.pcap.out
+++ b/tests/cfgs/default/result/imo.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 7 (3.50 pkts/flow)
Confidence DPI : 2 (flows)
-Num dissector calls: 337 (168.50 diss/flow)
+Num dissector calls: 339 (169.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/instagram.pcap.out b/tests/cfgs/default/result/instagram.pcap.out
index afdcc128a..71547c61c 100644
--- a/tests/cfgs/default/result/instagram.pcap.out
+++ b/tests/cfgs/default/result/instagram.pcap.out
@@ -6,7 +6,7 @@ DPI Packets (other): 1 (1.00 pkts/flow)
Confidence Unknown : 1 (flows)
Confidence Match by port : 7 (flows)
Confidence DPI : 30 (flows)
-Num dissector calls: 1400 (36.84 diss/flow)
+Num dissector calls: 1401 (36.87 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/24/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/iphone.pcap.out b/tests/cfgs/default/result/iphone.pcap.out
index 4760fb758..6f18fddf7 100644
--- a/tests/cfgs/default/result/iphone.pcap.out
+++ b/tests/cfgs/default/result/iphone.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 55 (1.77 pkts/flow)
DPI Packets (other): 5 (1.00 pkts/flow)
Confidence Match by port : 1 (flows)
Confidence DPI : 50 (flows)
-Num dissector calls: 366 (7.18 diss/flow)
+Num dissector calls: 367 (7.20 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/ipv6_in_gtp.pcap.out b/tests/cfgs/default/result/ipv6_in_gtp.pcap.out
index 2658f9dab..b033d9622 100644
--- a/tests/cfgs/default/result/ipv6_in_gtp.pcap.out
+++ b/tests/cfgs/default/result/ipv6_in_gtp.pcap.out
@@ -2,7 +2,7 @@ DPI Packets (UDP): 1 (1.00 pkts/flow)
DPI Packets (other): 1 (1.00 pkts/flow)
Confidence Unknown : 1 (flows)
Confidence DPI : 1 (flows)
-Num dissector calls: 135 (67.50 diss/flow)
+Num dissector calls: 136 (68.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/kontiki.pcap.out b/tests/cfgs/default/result/kontiki.pcap.out
index 1b46e8b0b..ef400fd57 100644
--- a/tests/cfgs/default/result/kontiki.pcap.out
+++ b/tests/cfgs/default/result/kontiki.pcap.out
@@ -2,7 +2,7 @@ DPI Packets (UDP): 6 (1.50 pkts/flow)
DPI Packets (other): 4 (1.00 pkts/flow)
Confidence Unknown : 2 (flows)
Confidence DPI : 6 (flows)
-Num dissector calls: 371 (46.38 diss/flow)
+Num dissector calls: 373 (46.62 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/linecall_falsepositve.pcap.out b/tests/cfgs/default/result/linecall_falsepositve.pcap.out
index 57348d084..877cd128d 100644
--- a/tests/cfgs/default/result/linecall_falsepositve.pcap.out
+++ b/tests/cfgs/default/result/linecall_falsepositve.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 12 (12.00 pkts/flow)
Confidence Unknown : 1 (flows)
-Num dissector calls: 211 (211.00 diss/flow)
+Num dissector calls: 212 (212.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/lol_wild_rift_udp.pcap.out b/tests/cfgs/default/result/lol_wild_rift_udp.pcap.out
new file mode 100644
index 000000000..02fb2df60
--- /dev/null
+++ b/tests/cfgs/default/result/lol_wild_rift_udp.pcap.out
@@ -0,0 +1,32 @@
+DPI Packets (UDP): 8 (1.60 pkts/flow)
+Confidence DPI : 5 (flows)
+Num dissector calls: 749 (149.80 diss/flow)
+LRU cache ookla: 0/0/0 (insert/search/found)
+LRU cache bittorrent: 0/0/0 (insert/search/found)
+LRU cache zoom: 0/0/0 (insert/search/found)
+LRU cache stun: 0/0/0 (insert/search/found)
+LRU cache tls_cert: 0/0/0 (insert/search/found)
+LRU cache mining: 0/0/0 (insert/search/found)
+LRU cache msteams: 0/0/0 (insert/search/found)
+LRU cache stun_zoom: 0/0/0 (insert/search/found)
+Automa host: 0/0 (search/found)
+Automa domain: 0/0 (search/found)
+Automa tls cert: 0/0 (search/found)
+Automa risk mask: 0/0 (search/found)
+Automa common alpns: 0/0 (search/found)
+Patricia risk mask: 4/0 (search/found)
+Patricia risk mask IPv6: 0/0 (search/found)
+Patricia risk: 0/0 (search/found)
+Patricia risk IPv6: 0/0 (search/found)
+Patricia protocols: 5/5 (search/found)
+Patricia protocols IPv6: 0/0 (search/found)
+
+LoLWildRift 8 1552 5
+
+Fun 8 1552 5
+
+ 1 UDP 10.215.173.1:43686 <-> 13.51.213.131:15007 [proto: 407/LoLWildRift][IP: 265/AmazonAWS][ClearText][Confidence: DPI][DPI packets: 2][cat: Game/8][1 pkts/97 bytes <-> 1 pkts/387 bytes][Goodput ratio: 70/93][0.03 sec][Plen Bins: 0,0,50,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 2 UDP 10.215.173.1:46702 <-> 51.20.230.207:15007 [proto: 407/LoLWildRift][IP: 265/AmazonAWS][ClearText][Confidence: DPI][DPI packets: 2][cat: Game/8][1 pkts/97 bytes <-> 1 pkts/387 bytes][Goodput ratio: 70/93][0.08 sec][Plen Bins: 0,0,50,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 3 UDP 10.215.173.1:47462 <-> 51.20.230.207:15004 [proto: 407/LoLWildRift][IP: 265/AmazonAWS][ClearText][Confidence: DPI][DPI packets: 2][cat: Game/8][1 pkts/97 bytes <-> 1 pkts/387 bytes][Goodput ratio: 70/93][0.09 sec][Plen Bins: 0,0,50,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 4 UDP 10.215.173.1:41440 -> 13.53.58.18:18001 [proto: 407/LoLWildRift][IP: 265/AmazonAWS][ClearText][Confidence: DPI][DPI packets: 1][cat: Game/8][1 pkts/50 bytes -> 0 pkts/0 bytes][Goodput ratio: 43/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 5 UDP 10.215.173.1:44513 -> 13.53.58.18:18001 [proto: 407/LoLWildRift][IP: 265/AmazonAWS][ClearText][Confidence: DPI][DPI packets: 1][cat: Game/8][1 pkts/50 bytes -> 0 pkts/0 bytes][Goodput ratio: 43/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
diff --git a/tests/cfgs/default/result/lru_ipv6_caches.pcapng.out b/tests/cfgs/default/result/lru_ipv6_caches.pcapng.out
index 20839c32c..c2f44ace6 100644
--- a/tests/cfgs/default/result/lru_ipv6_caches.pcapng.out
+++ b/tests/cfgs/default/result/lru_ipv6_caches.pcapng.out
@@ -2,7 +2,7 @@ DPI Packets (TCP): 9 (3.00 pkts/flow)
DPI Packets (UDP): 30 (3.33 pkts/flow)
Confidence DPI (cache) : 4 (flows)
Confidence DPI : 8 (flows)
-Num dissector calls: 649 (54.08 diss/flow)
+Num dissector calls: 651 (54.25 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 25/7/2 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/mullvad_wireguard.pcap.out b/tests/cfgs/default/result/mullvad_wireguard.pcap.out
index 9649cd90a..cef708053 100644
--- a/tests/cfgs/default/result/mullvad_wireguard.pcap.out
+++ b/tests/cfgs/default/result/mullvad_wireguard.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 3 (3.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 156 (156.00 diss/flow)
+Num dissector calls: 157 (157.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/mumble.pcapng.out b/tests/cfgs/default/result/mumble.pcapng.out
index aec852eef..585cd446e 100644
--- a/tests/cfgs/default/result/mumble.pcapng.out
+++ b/tests/cfgs/default/result/mumble.pcapng.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 6 (6.00 pkts/flow)
DPI Packets (UDP): 4 (2.00 pkts/flow)
Confidence DPI : 3 (flows)
-Num dissector calls: 311 (103.67 diss/flow)
+Num dissector calls: 313 (104.33 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/nintendo.pcap.out b/tests/cfgs/default/result/nintendo.pcap.out
index f7a52af08..8bae80d12 100644
--- a/tests/cfgs/default/result/nintendo.pcap.out
+++ b/tests/cfgs/default/result/nintendo.pcap.out
@@ -6,7 +6,7 @@ DPI Packets (other): 2 (1.00 pkts/flow)
Confidence Match by port : 1 (flows)
Confidence DPI : 15 (flows)
Confidence Match by IP : 5 (flows)
-Num dissector calls: 1347 (64.14 diss/flow)
+Num dissector calls: 1352 (64.38 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/18/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/openvpn-tlscrypt.pcap.out b/tests/cfgs/default/result/openvpn-tlscrypt.pcap.out
index 838b2b60e..e62398333 100644
--- a/tests/cfgs/default/result/openvpn-tlscrypt.pcap.out
+++ b/tests/cfgs/default/result/openvpn-tlscrypt.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 4 (4.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 168 (168.00 diss/flow)
+Num dissector calls: 169 (169.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/openvpn.pcap.out b/tests/cfgs/default/result/openvpn.pcap.out
index 13c05add1..be89ec748 100644
--- a/tests/cfgs/default/result/openvpn.pcap.out
+++ b/tests/cfgs/default/result/openvpn.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 24 (8.00 pkts/flow)
DPI Packets (UDP): 15 (3.00 pkts/flow)
Confidence DPI : 8 (flows)
-Num dissector calls: 1338 (167.25 diss/flow)
+Num dissector calls: 1343 (167.88 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/openvpn_nohmac.pcapng.out b/tests/cfgs/default/result/openvpn_nohmac.pcapng.out
index e0bbe8310..0ffe702b5 100644
--- a/tests/cfgs/default/result/openvpn_nohmac.pcapng.out
+++ b/tests/cfgs/default/result/openvpn_nohmac.pcapng.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 139 (139.00 diss/flow)
+Num dissector calls: 140 (140.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/ossfuzz_seed_fake_traces_1.pcapng.out b/tests/cfgs/default/result/ossfuzz_seed_fake_traces_1.pcapng.out
index c19f2d66c..d9d7bc56b 100644
--- a/tests/cfgs/default/result/ossfuzz_seed_fake_traces_1.pcapng.out
+++ b/tests/cfgs/default/result/ossfuzz_seed_fake_traces_1.pcapng.out
@@ -2,7 +2,7 @@ DPI Packets (TCP): 8 (1.33 pkts/flow)
DPI Packets (UDP): 9 (2.25 pkts/flow)
Confidence Unknown : 1 (flows)
Confidence DPI : 9 (flows)
-Num dissector calls: 805 (80.50 diss/flow)
+Num dissector calls: 808 (80.80 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/ossfuzz_seed_fake_traces_2.pcapng.out b/tests/cfgs/default/result/ossfuzz_seed_fake_traces_2.pcapng.out
index 7216e23cd..b2c1414e4 100644
--- a/tests/cfgs/default/result/ossfuzz_seed_fake_traces_2.pcapng.out
+++ b/tests/cfgs/default/result/ossfuzz_seed_fake_traces_2.pcapng.out
@@ -4,7 +4,7 @@ DPI Packets (TCP): 38 (6.33 pkts/flow)
DPI Packets (UDP): 4 (2.00 pkts/flow)
Confidence Match by port : 1 (flows)
Confidence DPI : 7 (flows)
-Num dissector calls: 1004 (125.50 diss/flow)
+Num dissector calls: 1006 (125.75 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/ossfuzz_seed_fake_traces_4.pcapng.out b/tests/cfgs/default/result/ossfuzz_seed_fake_traces_4.pcapng.out
index c7683b8ad..5806cdf90 100644
--- a/tests/cfgs/default/result/ossfuzz_seed_fake_traces_4.pcapng.out
+++ b/tests/cfgs/default/result/ossfuzz_seed_fake_traces_4.pcapng.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence Unknown : 1 (flows)
-Num dissector calls: 151 (151.00 diss/flow)
+Num dissector calls: 152 (152.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/pps.pcap.out b/tests/cfgs/default/result/pps.pcap.out
index fea7c48de..109fb46d7 100644
--- a/tests/cfgs/default/result/pps.pcap.out
+++ b/tests/cfgs/default/result/pps.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 131 (2.98 pkts/flow)
Confidence Unknown : 29 (flows)
Confidence Match by port : 2 (flows)
Confidence DPI : 76 (flows)
-Num dissector calls: 6131 (57.30 diss/flow)
+Num dissector calls: 6160 (57.57 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/93/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/protonvpn.pcap.out b/tests/cfgs/default/result/protonvpn.pcap.out
index 3ee85544a..7f63006fa 100644
--- a/tests/cfgs/default/result/protonvpn.pcap.out
+++ b/tests/cfgs/default/result/protonvpn.pcap.out
@@ -4,7 +4,7 @@ DPI Packets (TCP): 12 (6.00 pkts/flow)
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence Match by port : 1 (flows)
Confidence DPI : 2 (flows)
-Num dissector calls: 151 (50.33 diss/flow)
+Num dissector calls: 152 (50.67 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/quic.pcap.out b/tests/cfgs/default/result/quic.pcap.out
index 3a91112a8..57105bd07 100644
--- a/tests/cfgs/default/result/quic.pcap.out
+++ b/tests/cfgs/default/result/quic.pcap.out
@@ -3,7 +3,7 @@ Guessed flow protos: 1
DPI Packets (UDP): 12 (1.20 pkts/flow)
Confidence Match by port : 1 (flows)
Confidence DPI : 9 (flows)
-Num dissector calls: 232 (23.20 diss/flow)
+Num dissector calls: 233 (23.30 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/quic_0RTT.pcap.out b/tests/cfgs/default/result/quic_0RTT.pcap.out
index 687838771..cb5da9e4b 100644
--- a/tests/cfgs/default/result/quic_0RTT.pcap.out
+++ b/tests/cfgs/default/result/quic_0RTT.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 4 (2.00 pkts/flow)
Confidence DPI : 2 (flows)
-Num dissector calls: 212 (106.00 diss/flow)
+Num dissector calls: 213 (106.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/raknet.pcap.out b/tests/cfgs/default/result/raknet.pcap.out
index b3ab18dfb..39a7fc941 100644
--- a/tests/cfgs/default/result/raknet.pcap.out
+++ b/tests/cfgs/default/result/raknet.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 24 (2.00 pkts/flow)
Confidence DPI : 12 (flows)
-Num dissector calls: 1541 (128.42 diss/flow)
+Num dissector calls: 1547 (128.92 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/rdp2.pcap.out b/tests/cfgs/default/result/rdp2.pcap.out
index 47215984b..9ef43b2fb 100644
--- a/tests/cfgs/default/result/rdp2.pcap.out
+++ b/tests/cfgs/default/result/rdp2.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 8 (2.67 pkts/flow)
Confidence DPI : 3 (flows)
-Num dissector calls: 451 (150.33 diss/flow)
+Num dissector calls: 454 (151.33 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/rtp.pcapng.out b/tests/cfgs/default/result/rtp.pcapng.out
index f480cd2c2..e310da1a8 100644
--- a/tests/cfgs/default/result/rtp.pcapng.out
+++ b/tests/cfgs/default/result/rtp.pcapng.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 9 (3.00 pkts/flow)
Confidence DPI : 3 (flows)
-Num dissector calls: 479 (159.67 diss/flow)
+Num dissector calls: 482 (160.67 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/rx.pcap.out b/tests/cfgs/default/result/rx.pcap.out
index 07f7c76e8..fb80e6207 100644
--- a/tests/cfgs/default/result/rx.pcap.out
+++ b/tests/cfgs/default/result/rx.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 10 (2.00 pkts/flow)
Confidence DPI : 5 (flows)
-Num dissector calls: 755 (151.00 diss/flow)
+Num dissector calls: 760 (152.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/sflow.pcap.out b/tests/cfgs/default/result/sflow.pcap.out
index 0bfea6e23..a43a275ef 100644
--- a/tests/cfgs/default/result/sflow.pcap.out
+++ b/tests/cfgs/default/result/sflow.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 139 (139.00 diss/flow)
+Num dissector calls: 140 (140.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/sip.pcap.out b/tests/cfgs/default/result/sip.pcap.out
index 803904059..3e58a6c34 100644
--- a/tests/cfgs/default/result/sip.pcap.out
+++ b/tests/cfgs/default/result/sip.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 6 (1.50 pkts/flow)
Confidence DPI : 4 (flows)
-Num dissector calls: 212 (53.00 diss/flow)
+Num dissector calls: 213 (53.25 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/sip_hello.pcapng.out b/tests/cfgs/default/result/sip_hello.pcapng.out
index fefb2f439..e19c1e999 100644
--- a/tests/cfgs/default/result/sip_hello.pcapng.out
+++ b/tests/cfgs/default/result/sip_hello.pcapng.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 9 (9.00 pkts/flow)
Confidence DPI : 1 (flows)
-Num dissector calls: 200 (200.00 diss/flow)
+Num dissector calls: 201 (201.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/skinny.pcap.out b/tests/cfgs/default/result/skinny.pcap.out
index 09c4ac32e..f7045daea 100644
--- a/tests/cfgs/default/result/skinny.pcap.out
+++ b/tests/cfgs/default/result/skinny.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 2 (1.00 pkts/flow)
DPI Packets (UDP): 15 (3.00 pkts/flow)
Confidence DPI : 7 (flows)
-Num dissector calls: 777 (111.00 diss/flow)
+Num dissector calls: 782 (111.71 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/softether.pcap.out b/tests/cfgs/default/result/softether.pcap.out
index bdfdec899..16a9812cd 100644
--- a/tests/cfgs/default/result/softether.pcap.out
+++ b/tests/cfgs/default/result/softether.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 4 (4.00 pkts/flow)
DPI Packets (UDP): 31 (10.33 pkts/flow)
Confidence DPI : 4 (flows)
-Num dissector calls: 419 (104.75 diss/flow)
+Num dissector calls: 421 (105.25 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/starcraft_battle.pcap.out b/tests/cfgs/default/result/starcraft_battle.pcap.out
index 498d3c624..362034609 100644
--- a/tests/cfgs/default/result/starcraft_battle.pcap.out
+++ b/tests/cfgs/default/result/starcraft_battle.pcap.out
@@ -6,7 +6,7 @@ DPI Packets (other): 1 (1.00 pkts/flow)
Confidence Match by port : 12 (flows)
Confidence DPI : 39 (flows)
Confidence Match by IP : 1 (flows)
-Num dissector calls: 1681 (32.33 diss/flow)
+Num dissector calls: 1686 (32.42 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/39/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/synscan.pcap.out b/tests/cfgs/default/result/synscan.pcap.out
index f2836a5cd..95b179e06 100644
--- a/tests/cfgs/default/result/synscan.pcap.out
+++ b/tests/cfgs/default/result/synscan.pcap.out
@@ -142,7 +142,7 @@ Unrated 1852 107424 1848
46 TCP 172.16.0.8:36050 -> 64.13.134.52:2605 [proto: 13/BGP][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Network/14][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
47 TCP 172.16.0.8:36050 -> 64.13.134.52:3000 [proto: 26/ntop][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Network/14][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
48 TCP 172.16.0.8:36050 -> 64.13.134.52:3128 [proto: 131/HTTP_Proxy][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Web/5][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
- 49 TCP 172.16.0.8:36050 -> 64.13.134.52:3260 [proto: 407/iSCSI][IP: 0/Unknown][ClearText][Confidence: Match by custom rule][DPI packets: 1][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 49 TCP 172.16.0.8:36050 -> 64.13.134.52:3260 [proto: 408/iSCSI][IP: 0/Unknown][ClearText][Confidence: Match by custom rule][DPI packets: 1][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
50 TCP 172.16.0.8:36050 -> 64.13.134.52:3300 [proto: 381/Ceph][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: DataTransfer/4][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
51 TCP 172.16.0.8:36050 -> 64.13.134.52:3306 [proto: 20/MySQL][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Database/11][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
52 TCP 172.16.0.8:36050 -> 64.13.134.52:3389 [proto: 88/RDP][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: RemoteAccess/12][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Desktop/File Sharing **** Unidirectional Traffic **][Risk Score: 20][Risk Info: No server to client traffic / Found RDP][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
@@ -213,7 +213,7 @@ Unrated 1852 107424 1848
117 TCP 172.16.0.8:36051 -> 64.13.134.52:2605 [proto: 13/BGP][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Network/14][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
118 TCP 172.16.0.8:36051 -> 64.13.134.52:3000 [proto: 26/ntop][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Network/14][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
119 TCP 172.16.0.8:36051 -> 64.13.134.52:3128 [proto: 131/HTTP_Proxy][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Web/5][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
- 120 TCP 172.16.0.8:36051 -> 64.13.134.52:3260 [proto: 407/iSCSI][IP: 0/Unknown][ClearText][Confidence: Match by custom rule][DPI packets: 1][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 120 TCP 172.16.0.8:36051 -> 64.13.134.52:3260 [proto: 408/iSCSI][IP: 0/Unknown][ClearText][Confidence: Match by custom rule][DPI packets: 1][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
121 TCP 172.16.0.8:36051 -> 64.13.134.52:3300 [proto: 381/Ceph][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: DataTransfer/4][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
122 TCP 172.16.0.8:36051 -> 64.13.134.52:3306 [proto: 20/MySQL][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: Database/11][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Unidirectional Traffic **][Risk Score: 10][Risk Info: No server to client traffic][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
123 TCP 172.16.0.8:36051 -> 64.13.134.52:3389 [proto: 88/RDP][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 1][cat: RemoteAccess/12][1 pkts/58 bytes -> 0 pkts/0 bytes][Goodput ratio: 0/0][< 1 sec][Risk: ** Desktop/File Sharing **** Unidirectional Traffic **][Risk Score: 20][Risk Info: No server to client traffic / Found RDP][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
diff --git a/tests/cfgs/default/result/teams.pcap.out b/tests/cfgs/default/result/teams.pcap.out
index f14a35136..ca501429d 100644
--- a/tests/cfgs/default/result/teams.pcap.out
+++ b/tests/cfgs/default/result/teams.pcap.out
@@ -7,7 +7,7 @@ Confidence Unknown : 1 (flows)
Confidence Match by port : 1 (flows)
Confidence DPI (partial) : 5 (flows)
Confidence DPI : 76 (flows)
-Num dissector calls: 534 (6.43 diss/flow)
+Num dissector calls: 535 (6.45 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/9/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/teamspeak3.pcap.out b/tests/cfgs/default/result/teamspeak3.pcap.out
index 1344c6a5d..c62dc670b 100644
--- a/tests/cfgs/default/result/teamspeak3.pcap.out
+++ b/tests/cfgs/default/result/teamspeak3.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 4 (2.00 pkts/flow)
Confidence DPI : 2 (flows)
-Num dissector calls: 220 (110.00 diss/flow)
+Num dissector calls: 221 (110.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/teamviewer.pcap.out b/tests/cfgs/default/result/teamviewer.pcap.out
index 07c766ab3..7abddcaf4 100644
--- a/tests/cfgs/default/result/teamviewer.pcap.out
+++ b/tests/cfgs/default/result/teamviewer.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (TCP): 4 (4.00 pkts/flow)
DPI Packets (UDP): 4 (4.00 pkts/flow)
Confidence DPI : 2 (flows)
-Num dissector calls: 180 (90.00 diss/flow)
+Num dissector calls: 181 (90.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/telegram.pcap.out b/tests/cfgs/default/result/telegram.pcap.out
index 831c5e9f6..d62da5ba5 100644
--- a/tests/cfgs/default/result/telegram.pcap.out
+++ b/tests/cfgs/default/result/telegram.pcap.out
@@ -1,7 +1,7 @@
DPI Packets (UDP): 81 (1.69 pkts/flow)
Confidence Unknown : 3 (flows)
Confidence DPI : 45 (flows)
-Num dissector calls: 1582 (32.96 diss/flow)
+Num dissector calls: 1585 (33.02 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/9/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/tencent_games.pcap.out b/tests/cfgs/default/result/tencent_games.pcap.out
index 3803ecfa7..481ec0184 100644
--- a/tests/cfgs/default/result/tencent_games.pcap.out
+++ b/tests/cfgs/default/result/tencent_games.pcap.out
@@ -1,6 +1,6 @@
-DPI Packets (TCP): 4 (4.00 pkts/flow)
-Confidence DPI : 1 (flows)
-Num dissector calls: 144 (144.00 diss/flow)
+DPI Packets (TCP): 16 (4.00 pkts/flow)
+Confidence DPI : 4 (flows)
+Num dissector calls: 576 (144.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
@@ -18,11 +18,14 @@ Patricia risk mask: 0/0 (search/found)
Patricia risk mask IPv6: 0/0 (search/found)
Patricia risk: 0/0 (search/found)
Patricia risk IPv6: 0/0 (search/found)
-Patricia protocols: 1/1 (search/found)
+Patricia protocols: 4/4 (search/found)
Patricia protocols IPv6: 0/0 (search/found)
-TencentGames 10 818 1
+TencentGames 32 5618 4
-Fun 10 818 1
+Fun 32 5618 4
- 1 TCP 10.215.173.1:43300 <-> 43.130.19.227:65010 [proto: 395/TencentGames][IP: 285/Tencent][ClearText][Confidence: DPI][DPI packets: 4][cat: Game/8][5 pkts/413 bytes <-> 5 pkts/405 bytes][Goodput ratio: 47/49][0.61 sec][bytes ratio: 0.010 (Mixed)][IAT c2s/s2c min/avg/max/stddev: 4/4 103/104 200/200 95/96][Pkt Len c2s/s2c min/avg/max/stddev: 40/40 83/81 157/173 46/52][PLAIN TEXT (9089499565149320430)][Plen Bins: 0,0,50,25,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 1 TCP 10.215.173.1:47046 <-> 129.226.103.74:31003 [proto: 395/TencentGames][IP: 285/Tencent][ClearText][Confidence: DPI][DPI packets: 4][cat: Game/8][5 pkts/678 bytes <-> 5 pkts/2540 bytes][Goodput ratio: 67/92][1.09 sec][bytes ratio: -0.579 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 181/121 362/362 181/170][Pkt Len c2s/s2c min/avg/max/stddev: 40/40 136/508 498/2372 181/932][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50]
+ 2 TCP 10.215.173.1:42864 <-> 162.62.116.201:20731 [proto: 395/TencentGames][IP: 285/Tencent][ClearText][Confidence: DPI][DPI packets: 4][cat: Game/8][4 pkts/951 bytes <-> 2 pkts/88 bytes][Goodput ratio: 81/0][0.23 sec][bytes ratio: 0.831 (Upload)][IAT c2s/s2c min/avg/max/stddev: 32/124 75/124 124/124 38/0][Pkt Len c2s/s2c min/avg/max/stddev: 40/40 238/44 473/48 191/4][Plen Bins: 0,0,0,0,0,0,0,0,0,0,50,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 3 TCP 10.215.173.1:43300 <-> 43.130.19.227:65010 [proto: 395/TencentGames][IP: 285/Tencent][ClearText][Confidence: DPI][DPI packets: 4][cat: Game/8][5 pkts/413 bytes <-> 5 pkts/405 bytes][Goodput ratio: 47/49][0.61 sec][bytes ratio: 0.010 (Mixed)][IAT c2s/s2c min/avg/max/stddev: 4/4 103/104 200/200 95/96][Pkt Len c2s/s2c min/avg/max/stddev: 40/40 83/81 157/173 46/52][PLAIN TEXT (9089499565149320430)][Plen Bins: 0,0,50,25,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 4 TCP 10.215.173.1:46658 <-> 162.62.97.166:8085 [proto: 395/TencentGames][IP: 285/Tencent][ClearText][Confidence: DPI][DPI packets: 4][cat: Game/8][3 pkts/290 bytes <-> 3 pkts/253 bytes][Goodput ratio: 52/49][0.17 sec][bytes ratio: 0.068 (Mixed)][IAT c2s/s2c min/avg/max/stddev: 7/8 51/40 95/71 44/32][Pkt Len c2s/s2c min/avg/max/stddev: 40/40 97/84 190/165 66/57][PLAIN TEXT (gcloud)][Plen Bins: 0,0,0,50,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
diff --git a/tests/cfgs/default/result/tftp.pcap.out b/tests/cfgs/default/result/tftp.pcap.out
index b937b960a..84341fc07 100644
--- a/tests/cfgs/default/result/tftp.pcap.out
+++ b/tests/cfgs/default/result/tftp.pcap.out
@@ -3,7 +3,7 @@ Guessed flow protos: 2
DPI Packets (UDP): 15 (1.67 pkts/flow)
Confidence Match by port : 2 (flows)
Confidence DPI : 7 (flows)
-Num dissector calls: 673 (74.78 diss/flow)
+Num dissector calls: 677 (75.22 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/toca-boca.pcap.out b/tests/cfgs/default/result/toca-boca.pcap.out
index cd717fc72..b69671fa0 100644
--- a/tests/cfgs/default/result/toca-boca.pcap.out
+++ b/tests/cfgs/default/result/toca-boca.pcap.out
@@ -3,7 +3,7 @@ Guessed flow protos: 4
DPI Packets (UDP): 21 (1.00 pkts/flow)
Confidence Match by port : 4 (flows)
Confidence DPI : 17 (flows)
-Num dissector calls: 569 (27.10 diss/flow)
+Num dissector calls: 573 (27.29 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/12/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/viber.pcap.out b/tests/cfgs/default/result/viber.pcap.out
index 926d6ed53..61400fca9 100644
--- a/tests/cfgs/default/result/viber.pcap.out
+++ b/tests/cfgs/default/result/viber.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 27 (1.93 pkts/flow)
DPI Packets (other): 2 (1.00 pkts/flow)
Confidence Match by port : 4 (flows)
Confidence DPI : 25 (flows)
-Num dissector calls: 469 (16.17 diss/flow)
+Num dissector calls: 470 (16.21 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/12/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/webex.pcap.out b/tests/cfgs/default/result/webex.pcap.out
index 77e464c83..3b4e3c8f5 100644
--- a/tests/cfgs/default/result/webex.pcap.out
+++ b/tests/cfgs/default/result/webex.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 13 (6.50 pkts/flow)
Confidence Match by port : 3 (flows)
Confidence DPI : 53 (flows)
Confidence Match by IP : 1 (flows)
-Num dissector calls: 282 (4.95 diss/flow)
+Num dissector calls: 283 (4.96 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/12/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/weibo.pcap.out b/tests/cfgs/default/result/weibo.pcap.out
index 91a7b21e2..b8e63c80f 100644
--- a/tests/cfgs/default/result/weibo.pcap.out
+++ b/tests/cfgs/default/result/weibo.pcap.out
@@ -4,7 +4,7 @@ DPI Packets (TCP): 100 (3.33 pkts/flow)
DPI Packets (UDP): 42 (3.00 pkts/flow)
Confidence Match by port : 21 (flows)
Confidence DPI : 23 (flows)
-Num dissector calls: 560 (12.73 diss/flow)
+Num dissector calls: 562 (12.77 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/63/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/wireguard.pcap.out b/tests/cfgs/default/result/wireguard.pcap.out
index 2fdb86928..96912d859 100644
--- a/tests/cfgs/default/result/wireguard.pcap.out
+++ b/tests/cfgs/default/result/wireguard.pcap.out
@@ -1,6 +1,6 @@
DPI Packets (UDP): 6 (3.00 pkts/flow)
Confidence DPI : 2 (flows)
-Num dissector calls: 311 (155.50 diss/flow)
+Num dissector calls: 313 (156.50 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/zoom.pcap.out b/tests/cfgs/default/result/zoom.pcap.out
index 7b594a6b1..ff3747b5c 100644
--- a/tests/cfgs/default/result/zoom.pcap.out
+++ b/tests/cfgs/default/result/zoom.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 40 (2.22 pkts/flow)
DPI Packets (other): 2 (1.00 pkts/flow)
Confidence Match by port : 2 (flows)
Confidence DPI : 32 (flows)
-Num dissector calls: 1054 (31.00 diss/flow)
+Num dissector calls: 1055 (31.03 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 8/0/0 (insert/search/found)
diff --git a/tests/cfgs/default/result/zoom_p2p.pcapng.out b/tests/cfgs/default/result/zoom_p2p.pcapng.out
index 9c6eec15a..7d9ba148a 100644
--- a/tests/cfgs/default/result/zoom_p2p.pcapng.out
+++ b/tests/cfgs/default/result/zoom_p2p.pcapng.out
@@ -4,7 +4,7 @@ DPI Packets (UDP): 49 (4.90 pkts/flow)
DPI Packets (other): 2 (1.00 pkts/flow)
Confidence DPI (partial cache): 4 (flows)
Confidence DPI : 8 (flows)
-Num dissector calls: 845 (70.42 diss/flow)
+Num dissector calls: 849 (70.75 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/12/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/disable_protocols/result/dns_long_domainname.pcap.out b/tests/cfgs/disable_protocols/result/dns_long_domainname.pcap.out
index 87cf4be15..244d5b096 100644
--- a/tests/cfgs/disable_protocols/result/dns_long_domainname.pcap.out
+++ b/tests/cfgs/disable_protocols/result/dns_long_domainname.pcap.out
@@ -2,7 +2,7 @@ Guessed flow protos: 1
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence Match by IP : 1 (flows)
-Num dissector calls: 150 (150.00 diss/flow)
+Num dissector calls: 151 (151.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/disable_protocols/result/quic-mvfst-27.pcapng.out b/tests/cfgs/disable_protocols/result/quic-mvfst-27.pcapng.out
index b84732f7a..34e5d3274 100644
--- a/tests/cfgs/disable_protocols/result/quic-mvfst-27.pcapng.out
+++ b/tests/cfgs/disable_protocols/result/quic-mvfst-27.pcapng.out
@@ -2,7 +2,7 @@ Guessed flow protos: 1
DPI Packets (UDP): 12 (12.00 pkts/flow)
Confidence Match by IP : 1 (flows)
-Num dissector calls: 192 (192.00 diss/flow)
+Num dissector calls: 193 (193.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/3/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/enable_payload_stat/result/1kxun.pcap.out b/tests/cfgs/enable_payload_stat/result/1kxun.pcap.out
index 24a0e2d43..387baf17d 100644
--- a/tests/cfgs/enable_payload_stat/result/1kxun.pcap.out
+++ b/tests/cfgs/enable_payload_stat/result/1kxun.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 120 (1.21 pkts/flow)
Confidence Unknown : 14 (flows)
Confidence Match by port : 6 (flows)
Confidence DPI : 177 (flows)
-Num dissector calls: 5038 (25.57 diss/flow)
+Num dissector calls: 5052 (25.64 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/60/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/flow_risk_lists_disable/result/protonvpn.pcap.out b/tests/cfgs/flow_risk_lists_disable/result/protonvpn.pcap.out
index dc1ee293c..6a9bce607 100644
--- a/tests/cfgs/flow_risk_lists_disable/result/protonvpn.pcap.out
+++ b/tests/cfgs/flow_risk_lists_disable/result/protonvpn.pcap.out
@@ -4,7 +4,7 @@ DPI Packets (TCP): 12 (6.00 pkts/flow)
DPI Packets (UDP): 2 (2.00 pkts/flow)
Confidence Match by port : 1 (flows)
Confidence DPI : 2 (flows)
-Num dissector calls: 151 (50.33 diss/flow)
+Num dissector calls: 152 (50.67 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/6/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/guessing_disable/result/webex.pcap.out b/tests/cfgs/guessing_disable/result/webex.pcap.out
index 6f978fc58..11402658e 100644
--- a/tests/cfgs/guessing_disable/result/webex.pcap.out
+++ b/tests/cfgs/guessing_disable/result/webex.pcap.out
@@ -2,7 +2,7 @@ DPI Packets (TCP): 395 (7.18 pkts/flow)
DPI Packets (UDP): 13 (6.50 pkts/flow)
Confidence Unknown : 4 (flows)
Confidence DPI : 53 (flows)
-Num dissector calls: 282 (4.95 diss/flow)
+Num dissector calls: 283 (4.96 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/12/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/tests/cfgs/ip_lists_disable/result/1kxun.pcap.out b/tests/cfgs/ip_lists_disable/result/1kxun.pcap.out
index 5823fa919..32be16cb0 100644
--- a/tests/cfgs/ip_lists_disable/result/1kxun.pcap.out
+++ b/tests/cfgs/ip_lists_disable/result/1kxun.pcap.out
@@ -5,7 +5,7 @@ DPI Packets (UDP): 120 (1.21 pkts/flow)
Confidence Unknown : 14 (flows)
Confidence Match by port : 6 (flows)
Confidence DPI : 177 (flows)
-Num dissector calls: 5038 (25.57 diss/flow)
+Num dissector calls: 5052 (25.64 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/60/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
diff --git a/windows/nDPI.vcxproj b/windows/nDPI.vcxproj
index 034680443..8c4788df3 100644
--- a/windows/nDPI.vcxproj
+++ b/windows/nDPI.vcxproj
@@ -137,7 +137,7 @@
<ClCompile Include="..\src\lib\ndpi_binary_bitmap.c" />
<ClCompile Include="..\src\lib\ndpi_hash.c" />
<ClCompile Include="..\src\lib\ndpi_domain_classify.c" />
- <ClCompile Include="..\src\lib\ndpi_bitmap64.c" />
+ <ClCompile Include="..\src\lib\ndpi_bitmap64_fuse.c" />
<ClCompile Include="..\src\lib\protocols\activision.c" />
<ClCompile Include="..\src\lib\protocols\afp.c" />
<ClCompile Include="..\src\lib\protocols\ajp.c" />
@@ -372,6 +372,7 @@
<ClCompile Include="..\src\lib\protocols\path_of_exile.c" />
<ClCompile Include="..\src\lib\protocols\pfcp.c" />
<ClCompile Include="..\src\lib\protocols\flute.c" />
+ <ClCompile Include="..\src\lib\protocols\lol_wild_rift.c" />
<ClCompile Include="..\src\lib\third_party\src\gcrypt_light.c" />
<ClCompile Include="..\src\lib\third_party\src\libcache.c" />
<ClCompile Include="..\src\lib\third_party\src\libinjection_html5.c" />
diff --git a/windows/nDPI.vcxproj.filters b/windows/nDPI.vcxproj.filters
index b94033518..a6334312f 100644
--- a/windows/nDPI.vcxproj.filters
+++ b/windows/nDPI.vcxproj.filters
@@ -147,6 +147,7 @@
<ClCompile Include="..\src\lib\protocols\path_of_exile.c" />
<ClCompile Include="..\src\lib\protocols\pfcp.c" />
<ClCompile Include="..\src\lib\protocols\flute.c" />
+ <ClCompile Include="..\src\lib\protocols\lol_wild_rift.c" />
<ClCompile Include="src\getopt.c" />
<ClCompile Include="src\win-gettimeofday.c" />
<ClCompile Include="..\src\lib\ndpi_analyze.c" />