linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
@ 2018-07-22 17:37 Zhaoxiu Zeng
  2018-07-22 18:37 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 8+ messages in thread
From: Zhaoxiu Zeng @ 2018-07-22 17:37 UTC (permalink / raw)
  To: Andrew Morton, Matthew Wilcox, Dan Carpenter, Geert Uytterhoeven,
	Thomas Gleixner, Andrey Ryabinin, Greg Kroah-Hartman
  Cc: linux-kernel, Zhaoxiu Zeng

From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>

The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
For the Sunday algorithm, to see
	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
---
 lib/string.c | 92 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 81 insertions(+), 11 deletions(-)

diff --git a/lib/string.c b/lib/string.c
index 2c0900a5d51a..ed4ab9ee3373 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -898,19 +898,51 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	size_t l1, l2;
+	size_t l2;
+	const char *pchk = s1;
 
+#ifdef __HAVE_ARCH_STRLEN
 	l2 = strlen(s2);
+#else
+	for (l2 = 0; s2[l2] != 0; l2++)
+		/* nothing */;
+#endif
 	if (!l2)
 		return (char *)s1;
-	l1 = strlen(s1);
-	while (l1 >= l2) {
-		l1--;
-		if (!memcmp(s1, s2, l2))
+
+	/*
+	 * Sunday matching
+	 */
+	while (1) {
+		size_t i;
+		char k;
+
+		/* compare */
+		for (i = 0; i < l2; i++) {
+			if (s1[i] != s2[i])
+				break;
+		}
+		if (i >= l2)
 			return (char *)s1;
-		s1++;
+
+		/* if s1 terminate early? */
+		if (pchk < s1 + i)
+			pchk = s1 + i;
+		do {
+			k = *pchk;
+			if (k == 0)
+				return NULL;
+		} while (pchk++ != s1 + l2);
+
+		/* find the k's last presence in s2 (k = s1[l2]) */
+		for (i = l2; --i != (size_t)-1; ) {
+			if (k == s2[i])
+				break;
+		}
+
+		/* shift */
+		s1 += l2 - i;
 	}
-	return NULL;
 }
 EXPORT_SYMBOL(strstr);
 #endif
@@ -925,16 +957,54 @@ EXPORT_SYMBOL(strstr);
 char *strnstr(const char *s1, const char *s2, size_t len)
 {
 	size_t l2;
+	const char *pchk = s1;
 
+#ifdef __HAVE_ARCH_STRLEN
 	l2 = strlen(s2);
+#else
+	for (l2 = 0; s2[l2] != 0; l2++)
+		/* nothing */;
+#endif
 	if (!l2)
 		return (char *)s1;
+
+	/*
+	 * Sunday matching
+	 */
 	while (len >= l2) {
-		len--;
-		if (!memcmp(s1, s2, l2))
-			return (char *)s1;
-		s1++;
+		size_t i;
+		char k;
+
+		/* compare */
+		for (i = 0; i < l2; i++) {
+			if (s1[i] != s2[i])
+				break;
+		}
+		if (i >= l2)
+			return (char *)s1;
+		if (len == l2)
+			break;
+
+		/* if s1 terminate early? */
+		if (pchk < s1 + i)
+			pchk = s1 + i;
+		do {
+			k = *pchk;
+			if (k == 0)
+				return NULL;
+		} while (pchk++ != s1 + l2);
+
+		/* find the k's last presence in s2 (k = s1[l2]) */
+		for (i = l2; --i != (size_t)-1; ) {
+			if (k == s2[i])
+				break;
+		}
+
+		/* shift */
+		s1  += l2 - i;
+		len -= l2 - i;
 	}
+
 	return NULL;
 }
 EXPORT_SYMBOL(strnstr);
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-22 17:37 [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr() Zhaoxiu Zeng
@ 2018-07-22 18:37 ` Greg Kroah-Hartman
  2018-07-26 17:17   ` Zhaoxiu Zeng
  0 siblings, 1 reply; 8+ messages in thread
From: Greg Kroah-Hartman @ 2018-07-22 18:37 UTC (permalink / raw)
  To: Zhaoxiu Zeng
  Cc: Andrew Morton, Matthew Wilcox, Dan Carpenter, Geert Uytterhoeven,
	Thomas Gleixner, Andrey Ryabinin, linux-kernel, Zhaoxiu Zeng

On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
> From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
> 
> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
> For the Sunday algorithm, to see
> 	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm

So you say, but what does this really buy us?  Why make this change?
How was it tested?  What is the downside of not taking this?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-22 18:37 ` Greg Kroah-Hartman
@ 2018-07-26 17:17   ` Zhaoxiu Zeng
  2018-07-27  5:48     ` Zhaoxiu Zeng
  0 siblings, 1 reply; 8+ messages in thread
From: Zhaoxiu Zeng @ 2018-07-26 17:17 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Andrew Morton, Matthew Wilcox, Dan Carpenter, Geert Uytterhoeven,
	Thomas Gleixner, Andrey Ryabinin, linux-kernel, Zhaoxiu Zeng

在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>> From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
>>
>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>> For the Sunday algorithm, to see
>> 	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
> 
> So you say, but what does this really buy us?  Why make this change?
> How was it tested?  What is the downside of not taking this?
> 
> thanks,
> 
> greg k-h
> 

I use the following program to test on fc28.
Compile with O2, the new version is almost 2X faster than the original.
The code size of the original is 0x80, the newer is 0xB0.


#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

/**
 * strstr - Find the first substring in a %NUL terminated string
 * @s1: The string to be searched
 * @s2: The string to search for
 */
char *strstr1(const char *s1, const char *s2)
{
	size_t l1, l2;

	l2 = strlen(s2);
	if (!l2)
		return (char *)s1;
	l1 = strlen(s1);
	while (l1 >= l2) {
		l1--;
		if (!memcmp(s1, s2, l2))
			return (char *)s1;
		s1++;
	}
	return NULL;
}

char *strstr2(const char *s1, const char *s2)
{
	size_t l2;
	const char *pchk = s1;

#if 1//def __HAVE_ARCH_STRLEN
	l2 = strlen(s2);
#else
	for (l2 = 0; s2[l2] != 0; l2++)
		/* nothing */;
#endif
	if (!l2)
		return (char *)s1;

	/*
	 * Sunday matching
	 */
	while (1) {
		size_t i;
		char k;

		/* compare */
		for (i = 0; i < l2; i++) {
			if (s1[i] != s2[i])
				break;
		}
		if (i >= l2)
			return (char *)s1;

		/* if s1 terminate early? */
		if (pchk < s1 + i)
			pchk = s1 + i;
		do {
			k = *pchk;
			if (k == 0)
				return NULL;
		} while (pchk++ != s1 + l2);

		/* find the k's last presence in s2 (k = s1[l2]) */
		for (i = l2; --i != (size_t)-1; ) {
			if (k == s2[i])
				break;
		}

		/* shift */
		s1 += l2 - i;
	}
}

#ifdef __i386__
#  define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
#elif __x86_64__
#  define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
#else
# error unknown platform
#endif

#define RDTSC_START(cycles)                                \
    do {                                                   \
        register unsigned cyc_high, cyc_low;               \
        asm volatile("CPUID\n\t"                           \
                     "RDTSC\n\t"                           \
                     "mov %%edx, %0\n\t"                   \
                     "mov %%eax, %1\n\t"                   \
                     : "=r" (cyc_high), "=r" (cyc_low)     \
                     :: RDTSC_CLOBBER);                    \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
    } while (0)

#define RDTSC_STOP(cycles)                                 \
    do {                                                   \
        register unsigned cyc_high, cyc_low;               \
        asm volatile("RDTSCP\n\t"                          \
                     "mov %%edx, %0\n\t"                   \
                     "mov %%eax, %1\n\t"                   \
                     "CPUID\n\t"                           \
                     : "=r" (cyc_high), "=r" (cyc_low)     \
                     :: RDTSC_CLOBBER);                    \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
    } while (0)

typedef unsigned long long TIME_T;
#define start_time(start)		RDTSC_START(start)
#define stop_time(stop)			RDTSC_STOP(stop)
#define diff_time(start, stop)	(stop - start)

#define MAX_HAYSTACK_LENGTH 64
#define MAX_NEEDLE_LENGTH 16
#define TEST_LOOPS 1000

char haystack[MAX_HAYSTACK_LENGTH + 1];
char needle[MAX_NEEDLE_LENGTH + 1];

int main(int argc, char **argv)
{
    size_t i, j, n;
    void *p1, *p2;

    srand(time(0));

    for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
        haystack[i] = rand();
        if (haystack[i] != 0)
            i++;
    }
    haystack[i] = 0;

    for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
        TIME_T start, stop;
        unsigned long long elapsed1, elapsed2;

        j = rand() % MAX_NEEDLE_LENGTH;
        if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
            n = i;
        else
            n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
        memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
        needle[MAX_NEEDLE_LENGTH] = 0;

        elapsed1 = ~0UL;
        for (n = 0; n < TEST_LOOPS; n++) {
            start_time(start);
            asm volatile("":::"memory");
            p1 = strstr1(haystack, needle + j);
            asm volatile("":::"memory");
            stop_time(stop);

            if (elapsed1 > diff_time(start, stop))
                elapsed1 = diff_time(start, stop);
        }

        elapsed2 = ~0UL;
        for (n = 0; n < TEST_LOOPS; n++) {
            start_time(start);
            asm volatile("":::"memory");
            p2 = strstr2(haystack, needle + j);
            asm volatile("":::"memory");
            stop_time(stop);

            if (elapsed2 > diff_time(start, stop))
                elapsed2 = diff_time(start, stop);
        }

        if (p1 != p2)
            fprintf(stderr, "Error: %p != %p\n", p1, p2);
        else
            fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
    }

    return 0;
}

The test result:
[zzx@fedora strstr]$ ./test
  0,   3:	68,	68
  1,  13:	74,	66
  2,   2:	88,	94
  3,   5:	96,	88
  4,   8:	108,	80
  5,  13:	110,	76
  6,  12:	132,	82
  7,   9:	142,	82
  8,  11:	152,	88
  9,  13:	146,	90
 10,  14:	154,	94
 11,  15:	132,	104
 12,   1:	196,	114
 13,   8:	206,	108
 14,  14:	186,	110
 15,  13:	194,	112
 16,   0:	260,	124
 17,  10:	258,	124
 18,  15:	208,	136
 19,  11:	276,	136
 20,  13:	256,	128
 21,  12:	292,	144
 22,  11:	300,	142
 23,  13:	278,	144
 24,   7:	334,	152
 25,   1:	346,	166
 26,   6:	368,	168
 27,  15:	278,	182
 28,   1:	374,	168
 29,   3:	382,	188
 30,   8:	398,	172
 31,   4:	402,	188
 32,   0:	430,	180
 33,  10:	398,	184
 34,  10:	410,	192
 35,   8:	434,	180
 36,   7:	444,	198
 37,   6:	454,	204
 38,   1:	464,	220
 39,   2:	472,	206
 40,   4:	482,	210
 41,  15:	388,	262
 42,   2:	502,	210
 43,   5:	510,	220
 44,   8:	520,	214
 45,   0:	564,	236
 46,   3:	550,	242
 47,   8:	552,	262
 48,  10:	528,	270
 49,   2:	572,	256
 50,   3:	586,	246
 51,   7:	590,	278
 52,  14:	506,	308
 53,  14:	514,	298
 54,   4:	602,	240
 55,   5:	626,	284
 56,  15:	506,	316
 57,  10:	606,	326
 58,   4:	606,	240
 59,   0:	596,	240
 60,  13:	570,	316
 61,  13:	576,	328
 62,   5:	618,	284
 63,  13:	576,	328

Thanks!


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-26 17:17   ` Zhaoxiu Zeng
@ 2018-07-27  5:48     ` Zhaoxiu Zeng
  2018-07-27 10:39       ` Andy Shevchenko
  0 siblings, 1 reply; 8+ messages in thread
From: Zhaoxiu Zeng @ 2018-07-27  5:48 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Andrew Morton, Matthew Wilcox, Dan Carpenter, Geert Uytterhoeven,
	Thomas Gleixner, Andrey Ryabinin, linux-kernel, Zhaoxiu Zeng

在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>>> From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
>>>
>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>>> For the Sunday algorithm, to see
>>> 	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>
>> So you say, but what does this really buy us?  Why make this change?
>> How was it tested?  What is the downside of not taking this?
>>
>> thanks,
>>
>> greg k-h
>>
> 
> I use the following program to test on fc28.
> Compile with O2, the new version is almost 2X faster than the original.
> The code size of the original is 0x80, the newer is 0xB0.
> 
> 
> #include <stdio.h>
> #include <stdlib.h>
> #include <stdint.h>
> #include <string.h>
> #include <time.h>
> #include <unistd.h>
> 
> /**
>  * strstr - Find the first substring in a %NUL terminated string
>  * @s1: The string to be searched
>  * @s2: The string to search for
>  */
> char *strstr1(const char *s1, const char *s2)
> {
> 	size_t l1, l2;
> 
> 	l2 = strlen(s2);
> 	if (!l2)
> 		return (char *)s1;
> 	l1 = strlen(s1);
> 	while (l1 >= l2) {
> 		l1--;
> 		if (!memcmp(s1, s2, l2))
> 			return (char *)s1;
> 		s1++;
> 	}
> 	return NULL;
> }
> 
> char *strstr2(const char *s1, const char *s2)
> {
> 	size_t l2;
> 	const char *pchk = s1;
> 
> #if 1//def __HAVE_ARCH_STRLEN
> 	l2 = strlen(s2);
> #else
> 	for (l2 = 0; s2[l2] != 0; l2++)
> 		/* nothing */;
> #endif
> 	if (!l2)
> 		return (char *)s1;
> 
> 	/*
> 	 * Sunday matching
> 	 */
> 	while (1) {
> 		size_t i;
> 		char k;
> 
> 		/* compare */
> 		for (i = 0; i < l2; i++) {
> 			if (s1[i] != s2[i])
> 				break;
> 		}
> 		if (i >= l2)
> 			return (char *)s1;
> 
> 		/* if s1 terminate early? */
> 		if (pchk < s1 + i)
> 			pchk = s1 + i;
> 		do {
> 			k = *pchk;
> 			if (k == 0)
> 				return NULL;
> 		} while (pchk++ != s1 + l2);
> 
> 		/* find the k's last presence in s2 (k = s1[l2]) */
> 		for (i = l2; --i != (size_t)-1; ) {
> 			if (k == s2[i])
> 				break;
> 		}
> 
> 		/* shift */
> 		s1 += l2 - i;
> 	}
> }
> 
> #ifdef __i386__
> #  define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
> #elif __x86_64__
> #  define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
> #else
> # error unknown platform
> #endif
> 
> #define RDTSC_START(cycles)                                \
>     do {                                                   \
>         register unsigned cyc_high, cyc_low;               \
>         asm volatile("CPUID\n\t"                           \
>                      "RDTSC\n\t"                           \
>                      "mov %%edx, %0\n\t"                   \
>                      "mov %%eax, %1\n\t"                   \
>                      : "=r" (cyc_high), "=r" (cyc_low)     \
>                      :: RDTSC_CLOBBER);                    \
>         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
>     } while (0)
> 
> #define RDTSC_STOP(cycles)                                 \
>     do {                                                   \
>         register unsigned cyc_high, cyc_low;               \
>         asm volatile("RDTSCP\n\t"                          \
>                      "mov %%edx, %0\n\t"                   \
>                      "mov %%eax, %1\n\t"                   \
>                      "CPUID\n\t"                           \
>                      : "=r" (cyc_high), "=r" (cyc_low)     \
>                      :: RDTSC_CLOBBER);                    \
>         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
>     } while (0)
> 
> typedef unsigned long long TIME_T;
> #define start_time(start)		RDTSC_START(start)
> #define stop_time(stop)			RDTSC_STOP(stop)
> #define diff_time(start, stop)	(stop - start)
> 
> #define MAX_HAYSTACK_LENGTH 64
> #define MAX_NEEDLE_LENGTH 16
> #define TEST_LOOPS 1000
> 
> char haystack[MAX_HAYSTACK_LENGTH + 1];
> char needle[MAX_NEEDLE_LENGTH + 1];
> 
> int main(int argc, char **argv)
> {
>     size_t i, j, n;
>     void *p1, *p2;
> 
>     srand(time(0));
> 
>     for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
>         haystack[i] = rand();
>         if (haystack[i] != 0)
>             i++;
>     }
>     haystack[i] = 0;
> 
>     for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
>         TIME_T start, stop;
>         unsigned long long elapsed1, elapsed2;
> 
>         j = rand() % MAX_NEEDLE_LENGTH;
>         if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
>             n = i;
>         else
>             n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
>         memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
>         needle[MAX_NEEDLE_LENGTH] = 0;
> 
>         elapsed1 = ~0UL;
>         for (n = 0; n < TEST_LOOPS; n++) {
>             start_time(start);
>             asm volatile("":::"memory");
>             p1 = strstr1(haystack, needle + j);
>             asm volatile("":::"memory");
>             stop_time(stop);
> 
>             if (elapsed1 > diff_time(start, stop))
>                 elapsed1 = diff_time(start, stop);
>         }
> 
>         elapsed2 = ~0UL;
>         for (n = 0; n < TEST_LOOPS; n++) {
>             start_time(start);
>             asm volatile("":::"memory");
>             p2 = strstr2(haystack, needle + j);
>             asm volatile("":::"memory");
>             stop_time(stop);
> 
>             if (elapsed2 > diff_time(start, stop))
>                 elapsed2 = diff_time(start, stop);
>         }
> 
>         if (p1 != p2)
>             fprintf(stderr, "Error: %p != %p\n", p1, p2);
>         else
>             fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
>     }
> 
>     return 0;
> }
> 
> The test result:
> [zzx@fedora strstr]$ ./test
>   0,   3:	68,	68
>   1,  13:	74,	66
>   2,   2:	88,	94
>   3,   5:	96,	88
>   4,   8:	108,	80
>   5,  13:	110,	76
>   6,  12:	132,	82
>   7,   9:	142,	82
>   8,  11:	152,	88
>   9,  13:	146,	90
>  10,  14:	154,	94
>  11,  15:	132,	104
>  12,   1:	196,	114
>  13,   8:	206,	108
>  14,  14:	186,	110
>  15,  13:	194,	112
>  16,   0:	260,	124
>  17,  10:	258,	124
>  18,  15:	208,	136
>  19,  11:	276,	136
>  20,  13:	256,	128
>  21,  12:	292,	144
>  22,  11:	300,	142
>  23,  13:	278,	144
>  24,   7:	334,	152
>  25,   1:	346,	166
>  26,   6:	368,	168
>  27,  15:	278,	182
>  28,   1:	374,	168
>  29,   3:	382,	188
>  30,   8:	398,	172
>  31,   4:	402,	188
>  32,   0:	430,	180
>  33,  10:	398,	184
>  34,  10:	410,	192
>  35,   8:	434,	180
>  36,   7:	444,	198
>  37,   6:	454,	204
>  38,   1:	464,	220
>  39,   2:	472,	206
>  40,   4:	482,	210
>  41,  15:	388,	262
>  42,   2:	502,	210
>  43,   5:	510,	220
>  44,   8:	520,	214
>  45,   0:	564,	236
>  46,   3:	550,	242
>  47,   8:	552,	262
>  48,  10:	528,	270
>  49,   2:	572,	256
>  50,   3:	586,	246
>  51,   7:	590,	278
>  52,  14:	506,	308
>  53,  14:	514,	298
>  54,   4:	602,	240
>  55,   5:	626,	284
>  56,  15:	506,	316
>  57,  10:	606,	326
>  58,   4:	606,	240
>  59,   0:	596,	240
>  60,  13:	570,	316
>  61,  13:	576,	328
>  62,   5:	618,	284
>  63,  13:	576,	328
> 
> Thanks!
> 

The original strnstr might has a bug too!
For example, assume s1 is "123\0abc...." and s2 is "abc\0",
call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-27  5:48     ` Zhaoxiu Zeng
@ 2018-07-27 10:39       ` Andy Shevchenko
  2018-07-28 14:02         ` Zhaoxiu Zeng
  0 siblings, 1 reply; 8+ messages in thread
From: Andy Shevchenko @ 2018-07-27 10:39 UTC (permalink / raw)
  To: Zhaoxiu Zeng
  Cc: Greg Kroah-Hartman, Andrew Morton, Matthew Wilcox, Dan Carpenter,
	Geert Uytterhoeven, Thomas Gleixner, Andrey Ryabinin,
	Linux Kernel Mailing List, Zhaoxiu Zeng

On Fri, Jul 27, 2018 at 8:48 AM, Zhaoxiu Zeng <zengzhaoxiu@163.com> wrote:
> 在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
>> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:

>>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>>>> For the Sunday algorithm, to see
>>>>     http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>>
>>> So you say, but what does this really buy us?  Why make this change?
>>> How was it tested?  What is the downside of not taking this?

>> I use the following program to test on fc28.
>> Compile with O2, the new version is almost 2X faster than the original.

>> The code size of the original is 0x80, the newer is 0xB0.

So, output of bloat-o-meter would be good to have in commit message.

>> The test result:

Compact performance statistics as well.

>> Thanks!

> The original strnstr might has a bug too!
> For example, assume s1 is "123\0abc...." and s2 is "abc\0",
> call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.

If there is a bug, send another patch to fix the bug first.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-27 10:39       ` Andy Shevchenko
@ 2018-07-28 14:02         ` Zhaoxiu Zeng
  2018-07-28 14:38           ` Greg Kroah-Hartman
  0 siblings, 1 reply; 8+ messages in thread
From: Zhaoxiu Zeng @ 2018-07-28 14:02 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Greg Kroah-Hartman, Andrew Morton, Matthew Wilcox, Dan Carpenter,
	Geert Uytterhoeven, Thomas Gleixner, Andrey Ryabinin,
	Linux Kernel Mailing List, Zhaoxiu Zeng

在 2018/7/27 18:39, Andy Shevchenko 写道:
> On Fri, Jul 27, 2018 at 8:48 AM, Zhaoxiu Zeng <zengzhaoxiu@163.com> wrote:
>> 在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
>>> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>>>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
> 
>>>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>>>>> For the Sunday algorithm, to see
>>>>>     http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>>>
>>>> So you say, but what does this really buy us?  Why make this change?
>>>> How was it tested?  What is the downside of not taking this?
> 
>>> I use the following program to test on fc28.
>>> Compile with O2, the new version is almost 2X faster than the original.
> 
>>> The code size of the original is 0x80, the newer is 0xB0.
> 
> So, output of bloat-o-meter would be good to have in commit message.
> 
>>> The test result:
> 
> Compact performance statistics as well.
> 
>>> Thanks!
> 
>> The original strnstr might has a bug too!
>> For example, assume s1 is "123\0abc...." and s2 is "abc\0",
>> call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.
> 
> If there is a bug, send another patch to fix the bug first.
> 

The bug could be fixed by this patch.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-28 14:02         ` Zhaoxiu Zeng
@ 2018-07-28 14:38           ` Greg Kroah-Hartman
  2018-07-29  7:37             ` Zhaoxiu Zeng
  0 siblings, 1 reply; 8+ messages in thread
From: Greg Kroah-Hartman @ 2018-07-28 14:38 UTC (permalink / raw)
  To: Zhaoxiu Zeng
  Cc: Andy Shevchenko, Andrew Morton, Matthew Wilcox, Dan Carpenter,
	Geert Uytterhoeven, Thomas Gleixner, Andrey Ryabinin,
	Linux Kernel Mailing List, Zhaoxiu Zeng

On Sat, Jul 28, 2018 at 10:02:51PM +0800, Zhaoxiu Zeng wrote:
> 在 2018/7/27 18:39, Andy Shevchenko 写道:
> > On Fri, Jul 27, 2018 at 8:48 AM, Zhaoxiu Zeng <zengzhaoxiu@163.com> wrote:
> >> 在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
> >>> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
> >>>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
> > 
> >>>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
> >>>>> For the Sunday algorithm, to see
> >>>>>     http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
> >>>>
> >>>> So you say, but what does this really buy us?  Why make this change?
> >>>> How was it tested?  What is the downside of not taking this?
> > 
> >>> I use the following program to test on fc28.
> >>> Compile with O2, the new version is almost 2X faster than the original.
> > 
> >>> The code size of the original is 0x80, the newer is 0xB0.
> > 
> > So, output of bloat-o-meter would be good to have in commit message.
> > 
> >>> The test result:
> > 
> > Compact performance statistics as well.
> > 
> >>> Thanks!
> > 
> >> The original strnstr might has a bug too!
> >> For example, assume s1 is "123\0abc...." and s2 is "abc\0",
> >> call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.
> > 
> > If there is a bug, send another patch to fix the bug first.
> > 
> 
> The bug could be fixed by this patch.

Given that there doesn't seem to be a good reason to take your patch
yet, that might be hard :)

You need to convince us that the patch is a valid thing to accept, by
writing a correct changelog and showing proof of its correctness as this
is modifying a core function in the kernel.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
  2018-07-28 14:38           ` Greg Kroah-Hartman
@ 2018-07-29  7:37             ` Zhaoxiu Zeng
  0 siblings, 0 replies; 8+ messages in thread
From: Zhaoxiu Zeng @ 2018-07-29  7:37 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Andy Shevchenko, Andrew Morton, Matthew Wilcox, Dan Carpenter,
	Geert Uytterhoeven, Thomas Gleixner, Andrey Ryabinin,
	Linux Kernel Mailing List, Zhaoxiu Zeng

在 2018/7/28 22:38, Greg Kroah-Hartman 写道:
> On Sat, Jul 28, 2018 at 10:02:51PM +0800, Zhaoxiu Zeng wrote:
>> 在 2018/7/27 18:39, Andy Shevchenko 写道:
>>> On Fri, Jul 27, 2018 at 8:48 AM, Zhaoxiu Zeng <zengzhaoxiu@163.com> wrote:
>>>> 在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
>>>>> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>>>>>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>>>
>>>>>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>>>>>>> For the Sunday algorithm, to see
>>>>>>>     http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>>>>>
>>>>>> So you say, but what does this really buy us?  Why make this change?
>>>>>> How was it tested?  What is the downside of not taking this?
>>>
>>>>> I use the following program to test on fc28.
>>>>> Compile with O2, the new version is almost 2X faster than the original.
>>>
>>>>> The code size of the original is 0x80, the newer is 0xB0.
>>>
>>> So, output of bloat-o-meter would be good to have in commit message.
>>>
>>>>> The test result:
>>>
>>> Compact performance statistics as well.
>>>
>>>>> Thanks!
>>>
>>>> The original strnstr might has a bug too!
>>>> For example, assume s1 is "123\0abc...." and s2 is "abc\0",
>>>> call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.
>>>
>>> If there is a bug, send another patch to fix the bug first.
>>>
>>
>> The bug could be fixed by this patch.
> 
> Given that there doesn't seem to be a good reason to take your patch
> yet, that might be hard :)
> 
> You need to convince us that the patch is a valid thing to accept, by
> writing a correct changelog and showing proof of its correctness as this
> is modifying a core function in the kernel.
> 
> thanks,
> 
> greg k-h
> 

Thanks for responses!
I wrote a new changelog as described below. If it's ok, I will commit a new patch.
The details are as follow:


The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
For the Sunday algorithm, to see
	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm

The new strstr implementation steps:
	1) Gets the pattern length, and is denoted as l2.
	2) Compares the first l2 bytes of the text with the pattern.
	   If the comparison has matched, returns the pointer to starting of the text.
	   Else, gets the index of the symbol which causes a mismatch, and is denoted as i.
	3) Scans the text from index i to l2, to test whether the text is terminated.
	   If the text is terminated early, returns NULL.
	   Else, gets the symbol at index l2, and is denoted K.
	4) Finds the index of K's last presence in the pattern, and is denoted as j.
           If there is no occurence of K in the pattern, j is -1.
	5) Shifts the text window "l2 - j" bytes, and repeats step 2.

The original strstr scans the text two times. The 1st time gets the text length,
the 2nd time does the matches. The new strstr scans the text only one time.

The original strnstr has a bug. If the text length is smaller than the argument "len",
it maybe returns a wrong result. The bug could be fixed by this patch too.

I have tested using the following program on fc28.x86_64.
When compile with O2, the new version is almost 2X faster than the original,
the code size of the original strstr is 0x80 and the newer is 0xB0.


#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

/**
 * strstr - Find the first substring in a %NUL terminated string
 * @s1: The string to be searched
 * @s2: The string to search for
 */
char *strstr1(const char *s1, const char *s2)
{
	size_t l1, l2;

	l2 = strlen(s2);
	if (!l2)
		return (char *)s1;
	l1 = strlen(s1);
	while (l1 >= l2) {
		l1--;
		if (!memcmp(s1, s2, l2))
			return (char *)s1;
		s1++;
	}
	return NULL;
}

char *strstr2(const char *s1, const char *s2)
{
	size_t l2;
	const char *pchk = s1;

#if 1//def __HAVE_ARCH_STRLEN
	l2 = strlen(s2);
#else
	for (l2 = 0; s2[l2] != 0; l2++)
		/* nothing */;
#endif
	if (!l2)
		return (char *)s1;

	/*
	 * Sunday matching
	 */
	while (1) {
		size_t i;
		char k;

		/* compare */
		for (i = 0; i < l2; i++) {
			if (s1[i] != s2[i])
				break;
		}
		if (i >= l2)
			return (char *)s1;

		/* if s1 terminate early? */
		if (pchk < s1 + i)
			pchk = s1 + i;
		do {
			k = *pchk;
			if (k == 0)
				return NULL;
		} while (pchk++ != s1 + l2);

		/* find the k's last presence in s2 (k = s1[l2]) */
		for (i = l2; --i != (size_t)-1; ) {
			if (k == s2[i])
				break;
		}

		/* shift */
		s1 += l2 - i;
	}
}

#ifdef __i386__
#  define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
#elif __x86_64__
#  define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
#else
# error unknown platform
#endif

#define RDTSC_START(cycles)                                \
    do {                                                   \
        register unsigned cyc_high, cyc_low;               \
        asm volatile("CPUID\n\t"                           \
                     "RDTSC\n\t"                           \
                     "mov %%edx, %0\n\t"                   \
                     "mov %%eax, %1\n\t"                   \
                     : "=r" (cyc_high), "=r" (cyc_low)     \
                     :: RDTSC_CLOBBER);                    \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
    } while (0)

#define RDTSC_STOP(cycles)                                 \
    do {                                                   \
        register unsigned cyc_high, cyc_low;               \
        asm volatile("RDTSCP\n\t"                          \
                     "mov %%edx, %0\n\t"                   \
                     "mov %%eax, %1\n\t"                   \
                     "CPUID\n\t"                           \
                     : "=r" (cyc_high), "=r" (cyc_low)     \
                     :: RDTSC_CLOBBER);                    \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
    } while (0)

typedef unsigned long long TIME_T;
#define start_time(start)		RDTSC_START(start)
#define stop_time(stop)			RDTSC_STOP(stop)
#define diff_time(start, stop)	(stop - start)

#define MAX_HAYSTACK_LENGTH 64
#define MAX_NEEDLE_LENGTH 16
#define TEST_LOOPS 1000

char haystack[MAX_HAYSTACK_LENGTH + 1];
char needle[MAX_NEEDLE_LENGTH + 1];

int main(int argc, char **argv)
{
    size_t i, j, n;
    void *p1, *p2;

    srand(time(0));

    for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
        haystack[i] = rand();
        if (haystack[i] != 0)
            i++;
    }
    haystack[i] = 0;

    for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
        TIME_T start, stop;
        unsigned long long elapsed1, elapsed2;

        j = rand() % MAX_NEEDLE_LENGTH;
        if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
            n = i;
        else
            n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
        memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
        needle[MAX_NEEDLE_LENGTH] = 0;

        elapsed1 = ~0UL;
        for (n = 0; n < TEST_LOOPS; n++) {
            start_time(start);
            asm volatile("":::"memory");
            p1 = strstr1(haystack, needle + j);
            asm volatile("":::"memory");
            stop_time(stop);

            if (elapsed1 > diff_time(start, stop))
                elapsed1 = diff_time(start, stop);
        }

        elapsed2 = ~0UL;
        for (n = 0; n < TEST_LOOPS; n++) {
            start_time(start);
            asm volatile("":::"memory");
            p2 = strstr2(haystack, needle + j);
            asm volatile("":::"memory");
            stop_time(stop);

            if (elapsed2 > diff_time(start, stop))
                elapsed2 = diff_time(start, stop);
        }

        if (p1 != p2)
            fprintf(stderr, "Error: %p != %p\n", p1, p2);
        else
            fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
    }

    return 0;
}


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-07-29  7:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-22 17:37 [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr() Zhaoxiu Zeng
2018-07-22 18:37 ` Greg Kroah-Hartman
2018-07-26 17:17   ` Zhaoxiu Zeng
2018-07-27  5:48     ` Zhaoxiu Zeng
2018-07-27 10:39       ` Andy Shevchenko
2018-07-28 14:02         ` Zhaoxiu Zeng
2018-07-28 14:38           ` Greg Kroah-Hartman
2018-07-29  7:37             ` Zhaoxiu Zeng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).