aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Gavrilov <105977161+0xA50C1A1@users.noreply.github.com>2024-05-22 13:47:27 +0300
committerGitHub <noreply@github.com>2024-05-22 12:47:27 +0200
commit15643547fed19d8aa28ffcc7ea083092d199d499 (patch)
tree79f6164ad2575e8b81b426c1cd5d37ea1ed73130
parent5a25f89ab364078dd1478f56da4d3e2154784f1a (diff)
Replace ndpi_strnstr() implementation with an optimal one (#2447)
-rw-r--r--src/include/ndpi_api.h16
-rw-r--r--src/lib/ndpi_main.c66
-rw-r--r--tests/performance/strnstr.cpp96
3 files changed, 114 insertions, 64 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index ca365b8fe..fa1592863 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -115,17 +115,17 @@ extern "C" {
u_int32_t ndpi_get_tot_allocated_memory(void);
/**
- * Search the first occurrence of substring -find- in -s-
- * The search is limited to the first -slen- characters of the string
+ * Finds the first occurrence of the substring 'needle' in the string 'haystack'.
*
- * @par s = string to parse
- * @par find = string to match with -s-
- * @par slen = max length to match between -s- and -find-
- * @return a pointer to the beginning of the located substring;
- * NULL if the substring is not found
+ * This function is similar to the standard `strstr()` function, but it has an additional parameter `len` that
+ * specifies the maximum length of the search.
*
+ * @param haystack The string to search in.
+ * @param needle The substring to search for.
+ * @param len The maximum length of the search.
+ * @return Pointer to the first occurrence of 'needle' in 'haystack', or NULL if no match is found.
*/
- char* ndpi_strnstr(const char *s, const char *find, size_t slen);
+ char *ndpi_strnstr(const char *haystack, const char *needle, size_t len);
/**
* Same as ndpi_strnstr but case insensitive
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index d7eb5c282..c8675ecfc 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -9610,32 +9610,52 @@ void ndpi_dump_risks_score(FILE *risk_out) {
/* ****************************************************** */
-/*
- * Find the first occurrence of find in s, where the search is limited to the
- * first slen characters of s.
- */
-char *ndpi_strnstr(const char *s, const char *find, size_t slen) {
- char c;
- size_t len;
+char *ndpi_strnstr(const char *haystack, const char *needle, size_t len)
+{
+ if (!haystack || !needle || len == 0)
+ {
+ return NULL;
+ }
+
+ size_t needle_len = strlen(needle);
+ size_t hs_real_len = strnlen(haystack, len);
- if(s == NULL || find == NULL || slen == 0)
+ if (needle_len == 0)
+ {
+ return (char *)haystack;
+ }
+
+ if (needle_len > hs_real_len)
+ {
return NULL;
+ }
+
+ if (needle_len == 1)
+ {
+ return (char *)memchr(haystack, *needle, hs_real_len);
+ }
+
+ const char *current = haystack;
+ const char *haystack_end = haystack + hs_real_len;
+
+ while (current <= haystack_end - needle_len)
+ {
+ current = (const char *)memchr(current, *needle, haystack_end - current);
+
+ if (!current)
+ {
+ return NULL;
+ }
- if((c = *find++) != '\0') {
- len = strnlen(find, slen);
- do {
- char sc;
-
- do {
- if(slen-- < 1 || (sc = *s++) == '\0')
- return(NULL);
- } while(sc != c);
- if(len > slen)
- return(NULL);
- } while(strncmp(s, find, len) != 0);
- s--;
- }
- return((char *) s);
+ if ((current + needle_len <= haystack_end) && memcmp(current, needle, needle_len) == 0)
+ {
+ return (char *)current;
+ }
+
+ current++;
+ }
+
+ return NULL;
}
/* ****************************************************** */
diff --git a/tests/performance/strnstr.cpp b/tests/performance/strnstr.cpp
index 84922150a..7c3b3ba7d 100644
--- a/tests/performance/strnstr.cpp
+++ b/tests/performance/strnstr.cpp
@@ -1,13 +1,13 @@
#include <algorithm>
#include <chrono>
-#include <cmath>
#include <cstring>
#include <functional>
-#include <iomanip>
#include <iostream>
+#include <limits>
#include <map>
#include <random>
#include <string>
+#include <tuple>
#include <vector>
char *ndpi_strnstr(const char *s, const char *find, size_t slen) {
@@ -30,40 +30,42 @@ char *ndpi_strnstr(const char *s, const char *find, size_t slen) {
return ((char *)s);
}
-char *ndpi_strnstr_opt(const char *s, const char *find, size_t slen) {
- if (s == NULL || find == NULL || slen == 0) {
+char *ndpi_strnstr_opt(const char *haystack, const char *needle, size_t len) {
+ if (!haystack || !needle || len == 0) {
return NULL;
}
- char c = *find;
+ size_t needle_len = strlen(needle);
+ size_t hs_real_len = strnlen(haystack, len);
- if (c == '\0') {
- return (char *)s;
+ if (needle_len == 0) {
+ return (char *)haystack;
}
- if (*(find + 1) == '\0') {
- return (char *)memchr(s, c, slen);
- }
-
- size_t find_len = strnlen(find, slen);
-
- if (find_len > slen) {
+ if (needle_len > hs_real_len) {
return NULL;
}
- const char *end = s + slen - find_len;
+ if (needle_len == 1) {
+ return (char *)memchr(haystack, *needle, hs_real_len);
+ }
- while (s <= end) {
- if (memcmp(s, find, find_len) == 0) {
- return (char *)s;
- }
+ const char *current = haystack;
+ const char *haystack_end = haystack + hs_real_len;
- size_t remaining_length = end - s;
- s = (char *)memchr(s + 1, c, remaining_length);
+ while (current <= haystack_end - needle_len) {
+ current = (const char *)memchr(current, *needle, haystack_end - current);
- if (s == NULL || s > end) {
+ if (!current) {
return NULL;
}
+
+ if ((current + needle_len <= haystack_end) &&
+ memcmp(current, needle, needle_len) == 0) {
+ return (char *)current;
+ }
+
+ current++;
}
return NULL;
@@ -80,10 +82,9 @@ std::string random_string(size_t length, std::mt19937 &gen) {
double measure_time(const std::function<char *(const char *, const char *,
size_t)> &strnstr_impl,
- const std::string &haystack, const std::string &needle,
- std::mt19937 &gen) {
+ const std::string &haystack, const std::string &needle) {
auto start = std::chrono::high_resolution_clock::now();
- // Call the function to prevent optimization
+
volatile auto result =
strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size());
auto end = std::chrono::high_resolution_clock::now();
@@ -92,6 +93,31 @@ double measure_time(const std::function<char *(const char *, const char *,
.count();
}
+void warm_up(const std::function<char *(const char *, const char *, size_t)>
+ &strnstr_impl,
+ const std::string &haystack, const std::string &needle,
+ int iterations) {
+ for (int i = 0; i < iterations; i++) {
+ volatile auto result =
+ strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size());
+ }
+}
+
+double average_without_extremes(const std::vector<double> &times) {
+ if (times.size() < 5) {
+ return std::accumulate(times.begin(), times.end(), 0.0) /
+ static_cast<double>(times.size());
+ }
+
+ auto sorted_times = times;
+ std::sort(sorted_times.begin(), sorted_times.end());
+ sorted_times.erase(sorted_times.begin());
+ sorted_times.pop_back();
+
+ return std::accumulate(sorted_times.begin(), sorted_times.end(), 0.0) /
+ sorted_times.size();
+}
+
int main() {
std::ios_base::sync_with_stdio(false);
std::mt19937 gen(std::random_device{}());
@@ -105,10 +131,13 @@ int main() {
const std::vector<std::pair<
std::string, std::function<char *(const char *, const char *, size_t)>>>
strnstr_impls = {
- {"ndpi_strnstr", ndpi_strnstr}, {"ndpi_strnstr_opt", ndpi_strnstr_opt}
- // Add other implementations for comparison here
+ {"ndpi_strnstr", ndpi_strnstr},
+ {"ndpi_strnstr_opt", ndpi_strnstr_opt},
};
+ const int iterations = 100000;
+ const int warm_up_iterations = 1000;
+
for (size_t haystack_len : haystack_lengths) {
for (size_t needle_len : needle_lengths) {
std::cout << "\nTest case - Haystack length: " << haystack_len
@@ -120,19 +149,20 @@ int main() {
std::map<std::string, double> times;
for (const auto &impl : strnstr_impls) {
- double time_sum = 0.0;
- for (int i = 0; i < 100000; i++) {
- time_sum += measure_time(impl.second, haystack, needle, gen);
+ warm_up(impl.second, haystack, needle, warm_up_iterations);
+
+ std::vector<double> times_vector;
+ for (int i = 0; i < iterations; i++) {
+ times_vector.push_back(measure_time(impl.second, haystack, needle));
}
- double average_time =
- time_sum / 100000.0; // Average time in nanoseconds
+
+ double average_time = average_without_extremes(times_vector);
times[impl.first] = average_time;
std::cout << "Average time for " << impl.first << ": " << average_time
<< " ns\n";
}
- // Compare execution times between implementations
std::string fastest_impl;
double fastest_time = std::numeric_limits<double>::max();
for (const auto &impl_time : times) {