This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.16-ports-merge-243-gbcca089


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  bcca089526c6859e775243731037a469aec3065c (commit)
       via  99677e575504ec526546501b1fdb86221493768a (commit)
       via  400726deef57d8da8d6c7cd5c8e7891eaabab041 (commit)
       via  20a71f2c8ab45b458416422af2eec2b7bc52f66b (commit)
      from  21ad055803de5dd03606588753c46fbf8a5863b2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=bcca089526c6859e775243731037a469aec3065c

commit bcca089526c6859e775243731037a469aec3065c
Author: Maxim Kuvyrkov <maxim@codesourcery.com>
Date:   Mon May 28 22:46:07 2012 -0700

    Micro-optimize critical path of strstr, strcase and memmem.

diff --git a/ChangeLog b/ChangeLog
index 4cc8e47..104147f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
 
+	* string/str-two-way.h (AVAILABLE1_USES_J): New macro, define default.
+	(two_way_short_needle): Use it.
+	* string/{strstr.c, strcasestr.c} (AVAILABLE1_USES_J): Define.
+
+2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
+
 	* string/str-two-way.h (two_way_short_needle): Use pointers instead of
 	array references.
 	* string/strcasestr.c (TOLOWER): Make side-effect safe.
diff --git a/string/str-two-way.h b/string/str-two-way.h
index 1b7a8ba..59609b8 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -84,6 +84,9 @@
 #ifndef RET0_IF_0
 # define RET0_IF_0(a) /* nothing */
 #endif
+#ifndef AVAILABLE1_USES_J
+# define AVAILABLE1_USES_J (1)
+#endif
 
 /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
    Return the index of the first byte in the right half, and set
@@ -295,12 +298,17 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
 	      != (haystack_char = CANON_ELEMENT (*phaystack++)))
 	    {
 	      RET0_IF_0 (haystack_char);
+#if AVAILABLE1_USES_J
 	      ++j;
+#endif
 	      continue;
 	    }
 
-	  /* Calculate J.  */
+#if !AVAILABLE1_USES_J
+	  /* Calculate J if it wasn't kept up-to-date in the first-character
+	     loop.  */
 	  j = phaystack - &haystack[suffix] - 1;
+#endif
 
 	  /* Scan for matches in right half.  */
 	  i = suffix + 1;
@@ -497,6 +505,7 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 #undef AVAILABLE
 #undef AVAILABLE1
 #undef AVAILABLE2
+#undef AVAILABLE1_USES_J
 #undef CANON_ELEMENT
 #undef CMP_FUNC
 #undef RET0_IF_0
diff --git a/string/strcasestr.c b/string/strcasestr.c
index b6458ae..9467b7a 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -46,6 +46,7 @@
 #define AVAILABLE1(h, h_l, j, n_l) (true)
 #define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
 #define RET0_IF_0(a) if (!a) goto ret0
+#define AVAILABLE1_USES_J (0)
 #define CANON_ELEMENT(c) TOLOWER (c)
 #define CMP_FUNC(p1, p2, l)				\
   __strncasecmp ((const char *) (p1), (const char *) (p2), l)
diff --git a/string/strstr.c b/string/strstr.c
index ec2a1e9..cfed771 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -38,6 +38,7 @@
 #define AVAILABLE1(h, h_l, j, n_l) (true)
 #define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
 #define RET0_IF_0(a) if (!a) goto ret0
+#define AVAILABLE1_USES_J (0)
 #include "str-two-way.h"
 
 #undef strstr

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=99677e575504ec526546501b1fdb86221493768a

commit 99677e575504ec526546501b1fdb86221493768a
Author: Maxim Kuvyrkov <maxim@codesourcery.com>
Date:   Mon May 28 23:32:12 2012 -0700

    Use pointers for traversing arrays in strstr, strcasestr and memmem.

diff --git a/ChangeLog b/ChangeLog
index 4aeac6d..4cc8e47 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
 
+	* string/str-two-way.h (two_way_short_needle): Use pointers instead of
+	array references.
+	* string/strcasestr.c (TOLOWER): Make side-effect safe.
+
+2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
+
 	[BZ #11607]
 	* NEWS: Add an entry.
 	* string/str-two-way.h (AVAILABLE1, AVAILABLE2, RET0_IF_0): New macros,
diff --git a/string/str-two-way.h b/string/str-two-way.h
index aab627a..1b7a8ba 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -240,17 +240,24 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  const unsigned char *phaystack;
+
 	  /* Scan for matches in right half.  */
 	  i = MAX (suffix, memory);
-	  while (i < needle_len && (CANON_ELEMENT (needle[i])
-				    == CANON_ELEMENT (haystack[i + j])))
+	  pneedle = &needle[i];
+	  phaystack = &haystack[i + j];
+	  while (i < needle_len && (CANON_ELEMENT (*pneedle++)
+				    == CANON_ELEMENT (*phaystack++)))
 	    ++i;
 	  if (needle_len <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (memory < i + 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
+					== CANON_ELEMENT (*phaystack--)))
 		--i;
 	      if (i + 1 < memory + 1)
 		return (RETURN_TYPE) (haystack + j);
@@ -268,6 +275,7 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
     }
   else
     {
+      const unsigned char *phaystack = &haystack[suffix];
       /* The comparison always starts from needle[suffix], so cache it
 	 and use an optimized first-character loop.  */
       unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
@@ -279,23 +287,28 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
       while (AVAILABLE1 (haystack, haystack_len, j, needle_len))
 	{
 	  unsigned char haystack_char;
+	  const unsigned char *pneedle;
 
 	  /* TODO: The first-character loop can be sped up by adapting
 	     longword-at-a-time implementation of memchr/strchr.  */
 	  if (needle_suffix
-	      != (haystack_char = CANON_ELEMENT (haystack[suffix + j])))
+	      != (haystack_char = CANON_ELEMENT (*phaystack++)))
 	    {
 	      RET0_IF_0 (haystack_char);
 	      ++j;
 	      continue;
 	    }
 
+	  /* Calculate J.  */
+	  j = phaystack - &haystack[suffix] - 1;
+
 	  /* Scan for matches in right half.  */
 	  i = suffix + 1;
+	  pneedle = &needle[i];
 	  while (i < needle_len)
 	    {
-	      if (CANON_ELEMENT (needle[i])
-		  != (haystack_char = CANON_ELEMENT (haystack[i + j])))
+	      if (CANON_ELEMENT (*pneedle++)
+		  != (haystack_char = CANON_ELEMENT (*phaystack++)))
 		{
 		  RET0_IF_0 (haystack_char);
 		  break;
@@ -306,10 +319,12 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
 	      while (i != SIZE_MAX)
 		{
-		  if (CANON_ELEMENT (needle[i])
-		      != (haystack_char = CANON_ELEMENT (haystack[i + j])))
+		  if (CANON_ELEMENT (*pneedle--)
+		      != (haystack_char = CANON_ELEMENT (*phaystack--)))
 		    {
 		      RET0_IF_0 (haystack_char);
 		      break;
@@ -325,6 +340,8 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
 
 	  if (!AVAILABLE2 (haystack, haystack_len, j, needle_len))
 	    break;
+
+	  phaystack = &haystack[suffix + j];
 	}
     }
  ret0: __attribute__ ((unused))
@@ -379,6 +396,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  const unsigned char *phaystack;
+
 	  /* Check the last byte first; if it does not match, then
 	     shift to the next possible match location.  */
 	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
@@ -398,15 +418,19 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 	  /* Scan for matches in right half.  The last byte has
 	     already been matched, by virtue of the shift table.  */
 	  i = MAX (suffix, memory);
-	  while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	  pneedle = &needle[i];
+	  phaystack = &haystack[i + j];
+	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
+					== CANON_ELEMENT (*phaystack++)))
 	    ++i;
 	  if (needle_len - 1 <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (memory < i + 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
+					== CANON_ELEMENT (*phaystack--)))
 		--i;
 	      if (i + 1 < memory + 1)
 		return (RETURN_TYPE) (haystack + j);
@@ -431,6 +455,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  const unsigned char *phaystack;
+
 	  /* Check the last byte first; if it does not match, then
 	     shift to the next possible match location.  */
 	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
@@ -442,15 +469,19 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 	  /* Scan for matches in right half.  The last byte has
 	     already been matched, by virtue of the shift table.  */
 	  i = suffix;
-	  while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	  pneedle = &needle[i];
+	  phaystack = &haystack[i + j];
+	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
+					== CANON_ELEMENT (*phaystack++)))
 	    ++i;
 	  if (needle_len - 1 <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
-				       == CANON_ELEMENT (haystack[i + j])))
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--)
+				       == CANON_ELEMENT (*phaystack--)))
 		--i;
 	      if (i == SIZE_MAX)
 		return (RETURN_TYPE) (haystack + j);
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 184ca68..b6458ae 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -36,7 +36,7 @@
 #include <stdbool.h>
 #include <strings.h>
 
-#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+#define TOLOWER(Ch) tolower (Ch)
 
 /* Two-Way algorithm.  */
 #define RETURN_TYPE char *

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=400726deef57d8da8d6c7cd5c8e7891eaabab041

commit 400726deef57d8da8d6c7cd5c8e7891eaabab041
Author: Maxim Kuvyrkov <maxim@codesourcery.com>
Date:   Mon May 28 22:50:15 2012 -0700

    Detect EOL on-the-fly in strstr, strcasestr and memmem.

diff --git a/ChangeLog b/ChangeLog
index 7c96e02..4aeac6d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,18 @@
 2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
 
 	[BZ #11607]
+	* NEWS: Add an entry.
+	* string/str-two-way.h (AVAILABLE1, AVAILABLE2, RET0_IF_0): New macros,
+	define their defaults.
+	(two_way_short_needle): Detect end-of-string on-the-fly.
+	* string/strcasestr.c, string/strstr.c (AVAILABLE): Update.
+	(AVAILABLE1, AVAILABLE2, RET0_IF_0, AVAILABLE_USES_J): Define.
+	* string/bug-strcasestr1.c: New test.
+	* string/Makefile: Run it.
+
+2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+	[BZ #11607]
 	* string/str-two-way.h (two_way_short_needle): Optimize matching of
 	the first character.
 
diff --git a/NEWS b/NEWS
index cd4f498..d37e5a5 100644
--- a/NEWS
+++ b/NEWS
@@ -9,8 +9,8 @@ Version 2.17
 
 * The following bugs are resolved with this release:
 
-  6778, 6808, 9685, 13717, 13939, 14042, 14090, 14166, 14150, 14151, 14154,
-  14157, 14166, 14173, 14195, 14283, 14298, 14303, 14307, 14328, 14331,
+  6778, 6808, 9685, 11607, 13717, 13939, 14042, 14090, 14166, 14150, 14151,
+  14154, 14157, 14166, 14173, 14195, 14283, 14298, 14303, 14307, 14328, 14331,
   14336, 14337, 14347, 14349
 
 * Support for STT_GNU_IFUNC symbols added for s390 and s390x.
@@ -25,6 +25,9 @@ Version 2.17
 * SystemTap static probes have been added into the dynamic linker.
   Implemented by Gary Benson.
 
+* Optimizations of string functions strstr, strcasestr and memmem.
+  Implemented by Maxim Kuvyrkov.
+
 * The minimum Linux kernel version that this version of the GNU C Library
   can be used with is 2.6.16.
 
diff --git a/string/Makefile b/string/Makefile
index 1628b6a..a1204d9 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -56,7 +56,7 @@ tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
 		   tst-strtok tst-strxfrm bug-strcoll1 tst-strfry	\
 		   bug-strtok1 $(addprefix test-,$(strop-tests))	\
 		   bug-envz1 tst-strxfrm2 tst-endian tst-svc2		\
-		   bug-strstr1 bug-strchr1 tst-strtok_r
+		   bug-strstr1 bug-strcasestr1 bug-strchr1 tst-strtok_r
 
 
 include ../Rules
@@ -74,6 +74,7 @@ CFLAGS-stratcliff.c = -fno-builtin
 CFLAGS-test-ffs.c = -fno-builtin
 CFLAGS-tst-inlcall.c = -fno-builtin
 CFLAGS-bug-strstr1.c = -fno-builtin
+CFLAGS-bug-strcasestr1.c = -fno-builtin
 
 ifeq ($(cross-compiling),no)
 tests: $(objpfx)tst-svc.out
diff --git a/string/bug-strcasestr1.c b/string/bug-strcasestr1.c
new file mode 100644
index 0000000..e8334d7
--- /dev/null
+++ b/string/bug-strcasestr1.c
@@ -0,0 +1,39 @@
+/* Test for non-submitted strcasestr bug.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <string.h>
+
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
+{
+  const char haystack[] = "AOKB";
+  const char needle[] = "OK";
+  const char *sub = strcasestr (haystack, needle);
+
+  if (sub == NULL)
+    {
+      fprintf (stderr, "BUG: didn't find \"%s\" in \"%s\"\n", needle, haystack);
+      return 1;
+    }
+
+  return 0;
+}
+
+#include "../test-skeleton.c"
diff --git a/string/str-two-way.h b/string/str-two-way.h
index e18dc91..aab627a 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -75,6 +75,16 @@
 # define CMP_FUNC memcmp
 #endif
 
+#ifndef AVAILABLE1
+# define AVAILABLE1(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
+#endif
+#ifndef AVAILABLE2
+# define AVAILABLE2(h, h_l, j, n_l) (1)
+#endif
+#ifndef RET0_IF_0
+# define RET0_IF_0(a) /* nothing */
+#endif
+
 /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
    Return the index of the first byte in the right half, and set
    *PERIOD to the global period of the right half.
@@ -266,37 +276,58 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
 	 required, and any mismatch results in a maximal shift.  */
       period = MAX (suffix, needle_len - suffix) + 1;
       j = 0;
-      while (AVAILABLE (haystack, haystack_len, j, needle_len))
+      while (AVAILABLE1 (haystack, haystack_len, j, needle_len))
 	{
+	  unsigned char haystack_char;
+
 	  /* TODO: The first-character loop can be sped up by adapting
 	     longword-at-a-time implementation of memchr/strchr.  */
 	  if (needle_suffix
-	      != CANON_ELEMENT (haystack[suffix + j]))
+	      != (haystack_char = CANON_ELEMENT (haystack[suffix + j])))
 	    {
+	      RET0_IF_0 (haystack_char);
 	      ++j;
 	      continue;
 	    }
 
 	  /* Scan for matches in right half.  */
 	  i = suffix + 1;
-	  while (i < needle_len && (CANON_ELEMENT (needle[i])
-				    == CANON_ELEMENT (haystack[i + j])))
-	    ++i;
+	  while (i < needle_len)
+	    {
+	      if (CANON_ELEMENT (needle[i])
+		  != (haystack_char = CANON_ELEMENT (haystack[i + j])))
+		{
+		  RET0_IF_0 (haystack_char);
+		  break;
+		}
+	      ++i;
+	    }
 	  if (needle_len <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
-				       == CANON_ELEMENT (haystack[i + j])))
-		--i;
+	      while (i != SIZE_MAX)
+		{
+		  if (CANON_ELEMENT (needle[i])
+		      != (haystack_char = CANON_ELEMENT (haystack[i + j])))
+		    {
+		      RET0_IF_0 (haystack_char);
+		      break;
+		    }
+		  --i;
+		}
 	      if (i == SIZE_MAX)
 		return (RETURN_TYPE) (haystack + j);
 	      j += period;
 	    }
 	  else
 	    j += i - suffix + 1;
+
+	  if (!AVAILABLE2 (haystack, haystack_len, j, needle_len))
+	    break;
 	}
     }
+ ret0: __attribute__ ((unused))
   return NULL;
 }
 
@@ -433,6 +464,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 }
 
 #undef AVAILABLE
+#undef AVAILABLE1
+#undef AVAILABLE2
 #undef CANON_ELEMENT
 #undef CMP_FUNC
+#undef RET0_IF_0
 #undef RETURN_TYPE
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 9e1bde9..184ca68 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -1,6 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994, 1996-2000, 2004, 2008, 2009, 2010
-   Free Software Foundation, Inc.
+   Copyright (C) 1994-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -44,6 +43,9 @@
 #define AVAILABLE(h, h_l, j, n_l)			\
   (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
    && ((h_l) = (j) + (n_l)))
+#define AVAILABLE1(h, h_l, j, n_l) (true)
+#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
+#define RET0_IF_0(a) if (!a) goto ret0
 #define CANON_ELEMENT(c) TOLOWER (c)
 #define CMP_FUNC(p1, p2, l)				\
   __strncasecmp ((const char *) (p1), (const char *) (p2), l)
diff --git a/string/strstr.c b/string/strstr.c
index 10e6fdc..ec2a1e9 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -1,6 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009
-   Free Software Foundation, Inc.
+   Copyright (C) 1994-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -36,6 +35,9 @@
 #define AVAILABLE(h, h_l, j, n_l)			\
   (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
    && ((h_l) = (j) + (n_l)))
+#define AVAILABLE1(h, h_l, j, n_l) (true)
+#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
+#define RET0_IF_0(a) if (!a) goto ret0
 #include "str-two-way.h"
 
 #undef strstr

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=20a71f2c8ab45b458416422af2eec2b7bc52f66b

commit 20a71f2c8ab45b458416422af2eec2b7bc52f66b
Author: Maxim Kuvyrkov <maxim@codesourcery.com>
Date:   Tue May 29 00:04:12 2012 -0700

    Optimize first-character loop of strstr, strcasestr and memmem.

diff --git a/ChangeLog b/ChangeLog
index c6a780d..7c96e02 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2012-08-21  Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+	[BZ #11607]
+	* string/str-two-way.h (two_way_short_needle): Optimize matching of
+	the first character.
+
 2012-08-21  Roland McGrath  <roland@hack.frob.com>
 
 	* csu/elf-init.c (__libc_csu_irel): Function removed.
diff --git a/string/str-two-way.h b/string/str-two-way.h
index 22e7539..e18dc91 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -258,14 +258,27 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
     }
   else
     {
+      /* The comparison always starts from needle[suffix], so cache it
+	 and use an optimized first-character loop.  */
+      unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
+
       /* The two halves of needle are distinct; no extra memory is
 	 required, and any mismatch results in a maximal shift.  */
       period = MAX (suffix, needle_len - suffix) + 1;
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  /* TODO: The first-character loop can be sped up by adapting
+	     longword-at-a-time implementation of memchr/strchr.  */
+	  if (needle_suffix
+	      != CANON_ELEMENT (haystack[suffix + j]))
+	    {
+	      ++j;
+	      continue;
+	    }
+
 	  /* Scan for matches in right half.  */
-	  i = suffix;
+	  i = suffix + 1;
 	  while (i < needle_len && (CANON_ELEMENT (needle[i])
 				    == CANON_ELEMENT (haystack[i + j])))
 	    ++i;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                    |   30 ++++++
 NEWS                                         |    7 +-
 string/Makefile                              |    3 +-
 nptl/tst-once1.c => string/bug-strcasestr1.c |   29 ++----
 string/str-two-way.h                         |  127 ++++++++++++++++++++++----
 string/strcasestr.c                          |    9 +-
 string/strstr.c                              |    7 +-
 7 files changed, 164 insertions(+), 48 deletions(-)
 copy nptl/tst-once1.c => string/bug-strcasestr1.c (68%)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]