diff options
author | Vladimir Gavrilov <105977161+0xA50C1A1@users.noreply.github.com> | 2024-05-22 13:47:27 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 12:47:27 +0200 |
commit | 15643547fed19d8aa28ffcc7ea083092d199d499 (patch) | |
tree | 79f6164ad2575e8b81b426c1cd5d37ea1ed73130 /tests/performance/strnstr.cpp | |
parent | 5a25f89ab364078dd1478f56da4d3e2154784f1a (diff) |
Replace ndpi_strnstr() implementation with an optimal one (#2447)
Diffstat (limited to 'tests/performance/strnstr.cpp')
-rw-r--r-- | tests/performance/strnstr.cpp | 96 |
1 files changed, 63 insertions, 33 deletions
diff --git a/tests/performance/strnstr.cpp b/tests/performance/strnstr.cpp index 84922150a..7c3b3ba7d 100644 --- a/tests/performance/strnstr.cpp +++ b/tests/performance/strnstr.cpp @@ -1,13 +1,13 @@ #include <algorithm> #include <chrono> -#include <cmath> #include <cstring> #include <functional> -#include <iomanip> #include <iostream> +#include <limits> #include <map> #include <random> #include <string> +#include <tuple> #include <vector> char *ndpi_strnstr(const char *s, const char *find, size_t slen) { @@ -30,40 +30,42 @@ char *ndpi_strnstr(const char *s, const char *find, size_t slen) { return ((char *)s); } -char *ndpi_strnstr_opt(const char *s, const char *find, size_t slen) { - if (s == NULL || find == NULL || slen == 0) { +char *ndpi_strnstr_opt(const char *haystack, const char *needle, size_t len) { + if (!haystack || !needle || len == 0) { return NULL; } - char c = *find; + size_t needle_len = strlen(needle); + size_t hs_real_len = strnlen(haystack, len); - if (c == '\0') { - return (char *)s; + if (needle_len == 0) { + return (char *)haystack; } - if (*(find + 1) == '\0') { - return (char *)memchr(s, c, slen); - } - - size_t find_len = strnlen(find, slen); - - if (find_len > slen) { + if (needle_len > hs_real_len) { return NULL; } - const char *end = s + slen - find_len; + if (needle_len == 1) { + return (char *)memchr(haystack, *needle, hs_real_len); + } - while (s <= end) { - if (memcmp(s, find, find_len) == 0) { - return (char *)s; - } + const char *current = haystack; + const char *haystack_end = haystack + hs_real_len; - size_t remaining_length = end - s; - s = (char *)memchr(s + 1, c, remaining_length); + while (current <= haystack_end - needle_len) { + current = (const char *)memchr(current, *needle, haystack_end - current); - if (s == NULL || s > end) { + if (!current) { return NULL; } + + if ((current + needle_len <= haystack_end) && + memcmp(current, needle, needle_len) == 0) { + return (char *)current; + } + + current++; } return NULL; @@ -80,10 +82,9 @@ std::string random_string(size_t length, std::mt19937 &gen) { double measure_time(const std::function<char *(const char *, const char *, size_t)> &strnstr_impl, - const std::string &haystack, const std::string &needle, - std::mt19937 &gen) { + const std::string &haystack, const std::string &needle) { auto start = std::chrono::high_resolution_clock::now(); - // Call the function to prevent optimization + volatile auto result = strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size()); auto end = std::chrono::high_resolution_clock::now(); @@ -92,6 +93,31 @@ double measure_time(const std::function<char *(const char *, const char *, .count(); } +void warm_up(const std::function<char *(const char *, const char *, size_t)> + &strnstr_impl, + const std::string &haystack, const std::string &needle, + int iterations) { + for (int i = 0; i < iterations; i++) { + volatile auto result = + strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size()); + } +} + +double average_without_extremes(const std::vector<double> ×) { + if (times.size() < 5) { + return std::accumulate(times.begin(), times.end(), 0.0) / + static_cast<double>(times.size()); + } + + auto sorted_times = times; + std::sort(sorted_times.begin(), sorted_times.end()); + sorted_times.erase(sorted_times.begin()); + sorted_times.pop_back(); + + return std::accumulate(sorted_times.begin(), sorted_times.end(), 0.0) / + sorted_times.size(); +} + int main() { std::ios_base::sync_with_stdio(false); std::mt19937 gen(std::random_device{}()); @@ -105,10 +131,13 @@ int main() { const std::vector<std::pair< std::string, std::function<char *(const char *, const char *, size_t)>>> strnstr_impls = { - {"ndpi_strnstr", ndpi_strnstr}, {"ndpi_strnstr_opt", ndpi_strnstr_opt} - // Add other implementations for comparison here + {"ndpi_strnstr", ndpi_strnstr}, + {"ndpi_strnstr_opt", ndpi_strnstr_opt}, }; + const int iterations = 100000; + const int warm_up_iterations = 1000; + for (size_t haystack_len : haystack_lengths) { for (size_t needle_len : needle_lengths) { std::cout << "\nTest case - Haystack length: " << haystack_len @@ -120,19 +149,20 @@ int main() { std::map<std::string, double> times; for (const auto &impl : strnstr_impls) { - double time_sum = 0.0; - for (int i = 0; i < 100000; i++) { - time_sum += measure_time(impl.second, haystack, needle, gen); + warm_up(impl.second, haystack, needle, warm_up_iterations); + + std::vector<double> times_vector; + for (int i = 0; i < iterations; i++) { + times_vector.push_back(measure_time(impl.second, haystack, needle)); } - double average_time = - time_sum / 100000.0; // Average time in nanoseconds + + double average_time = average_without_extremes(times_vector); times[impl.first] = average_time; std::cout << "Average time for " << impl.first << ": " << average_time << " ns\n"; } - // Compare execution times between implementations std::string fastest_impl; double fastest_time = std::numeric_limits<double>::max(); for (const auto &impl_time : times) { |