diff options
-rw-r--r-- | src/include/ndpi_api.h | 16 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 66 | ||||
-rw-r--r-- | tests/performance/strnstr.cpp | 96 |
3 files changed, 114 insertions, 64 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index ca365b8fe..fa1592863 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -115,17 +115,17 @@ extern "C" { u_int32_t ndpi_get_tot_allocated_memory(void); /** - * Search the first occurrence of substring -find- in -s- - * The search is limited to the first -slen- characters of the string + * Finds the first occurrence of the substring 'needle' in the string 'haystack'. * - * @par s = string to parse - * @par find = string to match with -s- - * @par slen = max length to match between -s- and -find- - * @return a pointer to the beginning of the located substring; - * NULL if the substring is not found + * This function is similar to the standard `strstr()` function, but it has an additional parameter `len` that + * specifies the maximum length of the search. * + * @param haystack The string to search in. + * @param needle The substring to search for. + * @param len The maximum length of the search. + * @return Pointer to the first occurrence of 'needle' in 'haystack', or NULL if no match is found. */ - char* ndpi_strnstr(const char *s, const char *find, size_t slen); + char *ndpi_strnstr(const char *haystack, const char *needle, size_t len); /** * Same as ndpi_strnstr but case insensitive diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index d7eb5c282..c8675ecfc 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -9610,32 +9610,52 @@ void ndpi_dump_risks_score(FILE *risk_out) { /* ****************************************************** */ -/* - * Find the first occurrence of find in s, where the search is limited to the - * first slen characters of s. - */ -char *ndpi_strnstr(const char *s, const char *find, size_t slen) { - char c; - size_t len; +char *ndpi_strnstr(const char *haystack, const char *needle, size_t len) +{ + if (!haystack || !needle || len == 0) + { + return NULL; + } + + size_t needle_len = strlen(needle); + size_t hs_real_len = strnlen(haystack, len); - if(s == NULL || find == NULL || slen == 0) + if (needle_len == 0) + { + return (char *)haystack; + } + + if (needle_len > hs_real_len) + { return NULL; + } + + if (needle_len == 1) + { + return (char *)memchr(haystack, *needle, hs_real_len); + } + + const char *current = haystack; + const char *haystack_end = haystack + hs_real_len; + + while (current <= haystack_end - needle_len) + { + current = (const char *)memchr(current, *needle, haystack_end - current); + + if (!current) + { + return NULL; + } - if((c = *find++) != '\0') { - len = strnlen(find, slen); - do { - char sc; - - do { - if(slen-- < 1 || (sc = *s++) == '\0') - return(NULL); - } while(sc != c); - if(len > slen) - return(NULL); - } while(strncmp(s, find, len) != 0); - s--; - } - return((char *) s); + if ((current + needle_len <= haystack_end) && memcmp(current, needle, needle_len) == 0) + { + return (char *)current; + } + + current++; + } + + return NULL; } /* ****************************************************** */ diff --git a/tests/performance/strnstr.cpp b/tests/performance/strnstr.cpp index 84922150a..7c3b3ba7d 100644 --- a/tests/performance/strnstr.cpp +++ b/tests/performance/strnstr.cpp @@ -1,13 +1,13 @@ #include <algorithm> #include <chrono> -#include <cmath> #include <cstring> #include <functional> -#include <iomanip> #include <iostream> +#include <limits> #include <map> #include <random> #include <string> +#include <tuple> #include <vector> char *ndpi_strnstr(const char *s, const char *find, size_t slen) { @@ -30,40 +30,42 @@ char *ndpi_strnstr(const char *s, const char *find, size_t slen) { return ((char *)s); } -char *ndpi_strnstr_opt(const char *s, const char *find, size_t slen) { - if (s == NULL || find == NULL || slen == 0) { +char *ndpi_strnstr_opt(const char *haystack, const char *needle, size_t len) { + if (!haystack || !needle || len == 0) { return NULL; } - char c = *find; + size_t needle_len = strlen(needle); + size_t hs_real_len = strnlen(haystack, len); - if (c == '\0') { - return (char *)s; + if (needle_len == 0) { + return (char *)haystack; } - if (*(find + 1) == '\0') { - return (char *)memchr(s, c, slen); - } - - size_t find_len = strnlen(find, slen); - - if (find_len > slen) { + if (needle_len > hs_real_len) { return NULL; } - const char *end = s + slen - find_len; + if (needle_len == 1) { + return (char *)memchr(haystack, *needle, hs_real_len); + } - while (s <= end) { - if (memcmp(s, find, find_len) == 0) { - return (char *)s; - } + const char *current = haystack; + const char *haystack_end = haystack + hs_real_len; - size_t remaining_length = end - s; - s = (char *)memchr(s + 1, c, remaining_length); + while (current <= haystack_end - needle_len) { + current = (const char *)memchr(current, *needle, haystack_end - current); - if (s == NULL || s > end) { + if (!current) { return NULL; } + + if ((current + needle_len <= haystack_end) && + memcmp(current, needle, needle_len) == 0) { + return (char *)current; + } + + current++; } return NULL; @@ -80,10 +82,9 @@ std::string random_string(size_t length, std::mt19937 &gen) { double measure_time(const std::function<char *(const char *, const char *, size_t)> &strnstr_impl, - const std::string &haystack, const std::string &needle, - std::mt19937 &gen) { + const std::string &haystack, const std::string &needle) { auto start = std::chrono::high_resolution_clock::now(); - // Call the function to prevent optimization + volatile auto result = strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size()); auto end = std::chrono::high_resolution_clock::now(); @@ -92,6 +93,31 @@ double measure_time(const std::function<char *(const char *, const char *, .count(); } +void warm_up(const std::function<char *(const char *, const char *, size_t)> + &strnstr_impl, + const std::string &haystack, const std::string &needle, + int iterations) { + for (int i = 0; i < iterations; i++) { + volatile auto result = + strnstr_impl(haystack.c_str(), needle.c_str(), haystack.size()); + } +} + +double average_without_extremes(const std::vector<double> ×) { + if (times.size() < 5) { + return std::accumulate(times.begin(), times.end(), 0.0) / + static_cast<double>(times.size()); + } + + auto sorted_times = times; + std::sort(sorted_times.begin(), sorted_times.end()); + sorted_times.erase(sorted_times.begin()); + sorted_times.pop_back(); + + return std::accumulate(sorted_times.begin(), sorted_times.end(), 0.0) / + sorted_times.size(); +} + int main() { std::ios_base::sync_with_stdio(false); std::mt19937 gen(std::random_device{}()); @@ -105,10 +131,13 @@ int main() { const std::vector<std::pair< std::string, std::function<char *(const char *, const char *, size_t)>>> strnstr_impls = { - {"ndpi_strnstr", ndpi_strnstr}, {"ndpi_strnstr_opt", ndpi_strnstr_opt} - // Add other implementations for comparison here + {"ndpi_strnstr", ndpi_strnstr}, + {"ndpi_strnstr_opt", ndpi_strnstr_opt}, }; + const int iterations = 100000; + const int warm_up_iterations = 1000; + for (size_t haystack_len : haystack_lengths) { for (size_t needle_len : needle_lengths) { std::cout << "\nTest case - Haystack length: " << haystack_len @@ -120,19 +149,20 @@ int main() { std::map<std::string, double> times; for (const auto &impl : strnstr_impls) { - double time_sum = 0.0; - for (int i = 0; i < 100000; i++) { - time_sum += measure_time(impl.second, haystack, needle, gen); + warm_up(impl.second, haystack, needle, warm_up_iterations); + + std::vector<double> times_vector; + for (int i = 0; i < iterations; i++) { + times_vector.push_back(measure_time(impl.second, haystack, needle)); } - double average_time = - time_sum / 100000.0; // Average time in nanoseconds + + double average_time = average_without_extremes(times_vector); times[impl.first] = average_time; std::cout << "Average time for " << impl.first << ": " << average_time << " ns\n"; } - // Compare execution times between implementations std::string fastest_impl; double fastest_time = std::numeric_limits<double>::max(); for (const auto &impl_time : times) { |