aboutsummaryrefslogtreecommitdiff
path: root/tests/performance/strnstr.cpp
diff options
context:
space:
mode:
authorVladimir Gavrilov <105977161+0xA50C1A1@users.noreply.github.com>2024-05-22 13:47:27 +0300
committerGitHub <noreply@github.com>2024-05-22 12:47:27 +0200
commit15643547fed19d8aa28ffcc7ea083092d199d499 (patch)
tree79f6164ad2575e8b81b426c1cd5d37ea1ed73130 /tests/performance/strnstr.cpp
parent5a25f89ab364078dd1478f56da4d3e2154784f1a (diff)
Replace ndpi_strnstr() implementation with an optimal one (#2447)
Diffstat (limited to 'tests/performance/strnstr.cpp')
-rw-r--r--tests/performance/strnstr.cpp96
1 files changed, 63 insertions, 33 deletions
diff --git a/tests/performance/strnstr.cpp b/tests/performance/strnstr.cpp
index 84922150a..7c3b3ba7d 100644
--- a/tests/performance/strnstr.cpp
+++ b/tests/performance/strnstr.cpp
@@ -1,13 +1,13 @@
#include <algorithm>
#include <chrono>
-#include <cmath>
#include <cstring>
#include <functional>
-#include <iomanip>
#include <iostream>
+#include <limits>
#include <map>
#include <random>
#include <string>
+#include <tuple>
#include <vector>
char *ndpi_strnstr(const char *s, const char *find, size_t slen) {
@@ -30,40 +30,42 @@ char *ndpi_strnstr(const char *s, const char *find, size_t slen) {
return ((char *)s);
}
-char *ndpi_strnstr_opt(const char *s, const char *find, size_t slen) {
- if (s == NULL || find == NULL || slen == 0) {
+char *ndpi_strnstr_opt(const char *haystack, const char *needle, size_t len) {
+ if (!haystack || !needle || len == 0) {
return NULL;
}
- char c = *find;
+ size_t needle_len = strlen(needle);
+ size_t hs_real_len = strnlen(haystack, len);
- if (c == '\0') {
- return (char *)s;
+ if (needle_len == 0) {
+ return (char *)haystack;
}
- if (*(find + 1) == '\0') {
- return (char *)memchr(s, c, slen);
- }
-
- size_t find_len = strnlen(find, slen);
-
- if (find_len > slen) {
+ if (needle_len > hs_real_len) {
return NULL;
}
- const char *end = s + slen - find_len;
+ if (needle_len == 1) {
+ return (char *)memchr(haystack, *needle, hs_real_len);
+ }
- while (s <= end) {
- if (memcmp(s, find, find_len) == 0) {
- return (char *)s;
- }
+ const char *current = haystack;
+ const char *haystack_end = haystack + hs_real_len;
- size_t remaining_length = end - s;
- s = (char *)memchr(s + 1, c, remaining_length);
+ while (current <= haystack_end - needle_len) {
+ current = (const char *)memchr(current, *needle, haystack_end - current);
- if (s == NULL || s > end) {
+ if (!current) {
return NULL;
}
+
+ if ((current + needle_len <= haystack_end) &&
+ memcmp(current, needle, needle_len) == 0) {
+ return (char *)current;
+ }
+
+ current++;
}
return NULL;
@@ -80,10 +82,9 @@ std::string random_string(size_t length, std::mt19937 &gen) {
double measure_time(const std::function<char *(const char *, const char *,
size_t)> &strnstr_impl,
- const std::string &haystack, const std::string &needle,
- std::mt19937 &gen) {
+ const std::string &haystack, const std::string &needle) {
auto start = std::chrono::high_resolution_clock::now();
- // Call the function to prevent optimization
+
volatile auto result =
strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size());
auto end = std::chrono::high_resolution_clock::now();
@@ -92,6 +93,31 @@ double measure_time(const std::function<char *(const char *, const char *,
.count();
}
+void warm_up(const std::function<char *(const char *, const char *, size_t)>
+ &strnstr_impl,
+ const std::string &haystack, const std::string &needle,
+ int iterations) {
+ for (int i = 0; i < iterations; i++) {
+ volatile auto result =
+ strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size());
+ }
+}
+
+double average_without_extremes(const std::vector<double> &times) {
+ if (times.size() < 5) {
+ return std::accumulate(times.begin(), times.end(), 0.0) /
+ static_cast<double>(times.size());
+ }
+
+ auto sorted_times = times;
+ std::sort(sorted_times.begin(), sorted_times.end());
+ sorted_times.erase(sorted_times.begin());
+ sorted_times.pop_back();
+
+ return std::accumulate(sorted_times.begin(), sorted_times.end(), 0.0) /
+ sorted_times.size();
+}
+
int main() {
std::ios_base::sync_with_stdio(false);
std::mt19937 gen(std::random_device{}());
@@ -105,10 +131,13 @@ int main() {
const std::vector<std::pair<
std::string, std::function<char *(const char *, const char *, size_t)>>>
strnstr_impls = {
- {"ndpi_strnstr", ndpi_strnstr}, {"ndpi_strnstr_opt", ndpi_strnstr_opt}
- // Add other implementations for comparison here
+ {"ndpi_strnstr", ndpi_strnstr},
+ {"ndpi_strnstr_opt", ndpi_strnstr_opt},
};
+ const int iterations = 100000;
+ const int warm_up_iterations = 1000;
+
for (size_t haystack_len : haystack_lengths) {
for (size_t needle_len : needle_lengths) {
std::cout << "\nTest case - Haystack length: " << haystack_len
@@ -120,19 +149,20 @@ int main() {
std::map<std::string, double> times;
for (const auto &impl : strnstr_impls) {
- double time_sum = 0.0;
- for (int i = 0; i < 100000; i++) {
- time_sum += measure_time(impl.second, haystack, needle, gen);
+ warm_up(impl.second, haystack, needle, warm_up_iterations);
+
+ std::vector<double> times_vector;
+ for (int i = 0; i < iterations; i++) {
+ times_vector.push_back(measure_time(impl.second, haystack, needle));
}
- double average_time =
- time_sum / 100000.0; // Average time in nanoseconds
+
+ double average_time = average_without_extremes(times_vector);
times[impl.first] = average_time;
std::cout << "Average time for " << impl.first << ": " << average_time
<< " ns\n";
}
- // Compare execution times between implementations
std::string fastest_impl;
double fastest_time = std::numeric_limits<double>::max();
for (const auto &impl_time : times) {