This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Remove some string2.h optimizations for gcc 3.0+


Hi!

I was surprised by code which came up from foo = stpcpy(foo, "./") on ia64,
so I ran my stringops benchmark I posted on i686/ia64/alpha/sparc/sparc64
here some months ago.
On all !_STRING_ARCH_unaligned platforms (ia64/alpha/sparc*), all of
mempcpy/strcpy/stpcpy were way slower if "optimizing" and generated much
larger code. Here are e.g. ia64 results (legend as before, in ticks, gcc is without
bits/string*.h opts, likewise nob, but with -fno-builtin too, str is -D__USE_STRING_INLINES
and str2 are just string2.h opts (without any options), * means the time without any
bits/string*.h opts is faster or as fast as the others):
                                                         gcc       nob       str      str2
mempcpy
                                           (0,a,1)        45        45       280       276 *
                                           (0,a,2)        48        48       292       290 *
                                          (0,ab,3)        51        51       276       276 *
                                         (0,abc,4)        54        54       299       299 *
                                        (0,abcd,5)        57        57       308       311 *
                                       (0,abcde,6)        60        60       324       332 *
                                      (0,abcdef,7)        63        63       305       306 *
                                     (0,abcdefg,8)        66        66       322       322 *
                             (0,quitelongstring,0)        34        35       155       155 *
                             (0,quitelongstring,1)        50        45       287       283 *
                             (0,quitelongstring,2)        56        48       295       296 *
                             (0,quitelongstring,3)        63        51       302       303 *
                             (0,quitelongstring,4)        70        54       303       299 *
                             (0,quitelongstring,5)        77        57       305       302 *
                             (0,quitelongstring,6)        84        60       307       302 *
                             (0,quitelongstring,7)        91        63       307       310 *
                             (0,quitelongstring,8)        98        66       317       322 *
                                           (5,a,1)        48        48       274       277 *
                                           (5,a,2)        51        51       304       291 *
                                          (5,ab,3)        54        54       275       276 *
                                         (5,abc,4)        57        57       303       306 *
                                        (5,abcd,5)        60        60       306       315 *
                                       (5,abcde,6)        63        63       328       328 *
                                      (5,abcdef,7)        66        66       304       304 *
                                     (5,abcdefg,8)        69        69       323       320 *
                             (5,quitelongstring,0)        37        37       160       153 *
                             (5,quitelongstring,1)        52        48       285       287 *
                             (5,quitelongstring,2)        59        51       299       298 *
                             (5,quitelongstring,3)        66        54       302       296 *
                             (5,quitelongstring,4)        73        57       303       299 *
                             (5,quitelongstring,5)        80        60       304       306 *
                             (5,quitelongstring,6)        87        63       303       301 *
                             (5,quitelongstring,7)        94        67       313       306 *
                             (5,quitelongstring,8)       101        69       320       320 *

stpcpy
                                           (0,NUL)        33        34       286       290 *
                                             (0,a)        40        40       280       280 *
                                            (0,ab)        47        47       301       312 *
                                           (0,abc)        54        54       336       338 *
                                          (0,abcd)        61        61       315       323 *
                                         (0,abcde)       122        72       312       323 *
                                        (0,abcdef)       138        75       305       312 *
                                       (0,abcdefg)       154        82       325       326 *
                                           (5,NUL)        36        36       288       291 *
                                             (5,a)        43        43       277       285 *
                                            (5,ab)        50        58       306       317 *
                                           (5,abc)        57        57       338       336 *
                                          (5,abcd)        64        64       319       323 *
                                         (5,abcde)       125        71       302       313 *
                                        (5,abcdef)       145        78       309       316 *
                                       (5,abcdefg)       157        85       319       320 *

strcpy
                                           (0,NUL)         4        51       300       290 *
                                             (0,a)        14        54       274       292 *
                                            (0,ab)         5        66       308       308 *
                                           (0,abc)         4        69       330       338 *
                                          (0,abcd)         5        72       310       335 *
                                         (0,abcde)         5        75       314       318 *
                                        (0,abcdef)         7        78       309       313 *
                                       (0,abcdefg)         4        81       322       331 *
                                           (5,NUL)         6        49       288       285 *
                                             (5,a)         7        53       279       277 *
                                            (5,ab)         8        59       304       317 *
                                           (5,abc)         9        90       330       335 *
                                          (5,abcd)        10        93       302       319 *
                                         (5,abcde)        11        96       302       305 *
                                        (5,abcdef)        12        99       307       315 *
                                       (5,abcdefg)        13       111       319       321 *

The resulting tester binary was 553221 bytes.
With the patch below:

mempcpy
                                           (0,a,1)        45        45        14        14
                                           (0,a,2)        48        48         4         4
                                          (0,ab,3)        51        51         7         7
                                         (0,abc,4)        54        54         4         4
                                        (0,abcd,5)        57        57         7         7
                                       (0,abcde,6)        60        60         7         7
                                      (0,abcdef,7)        63        63         7         7
                                     (0,abcdefg,8)        66        66         4         4
                             (0,quitelongstring,0)        35        35         1         1
                             (0,quitelongstring,1)        45        45         4         4
                             (0,quitelongstring,2)        49        48        14        14
                             (0,quitelongstring,3)        51        51         7         7
                             (0,quitelongstring,4)        54        54         4         4
                             (0,quitelongstring,5)        57        57         7         7
                             (0,quitelongstring,6)        60        60         7         7
                             (0,quitelongstring,7)        63        63         7         7
                             (0,quitelongstring,8)        66        66         4         4
                                           (5,a,1)        48        48         6         6
                                           (5,a,2)        51        51         7         7
                                          (5,ab,3)        54        54         8         8
                                         (5,abc,4)        57        57         9         9
                                        (5,abcd,5)        60        60        10        10
                                       (5,abcde,6)        63        63        11        11
                                      (5,abcdef,7)        66        66        12        12
                                     (5,abcdefg,8)        69        69        13        13
                             (5,quitelongstring,0)        37        37         4         4
                             (5,quitelongstring,1)        48        48         6         6
                             (5,quitelongstring,2)        51        51         7         7
                             (5,quitelongstring,3)        54        54         8         8
                             (5,quitelongstring,4)        57        57         9         9
                             (5,quitelongstring,5)        60        60        10        10
                             (5,quitelongstring,6)        63        63        11        11
                             (5,quitelongstring,7)        66        66        12        12
                             (5,quitelongstring,8)        69        69        13        13

stpcpy
                                           (0,NUL)        33        33        14        14
                                             (0,a)        40        40         4         4
                                            (0,ab)        47        47         5         5
                                           (0,abc)        54        54         4         4
                                          (0,abcd)        61        61         5         5
                                         (0,abcde)        68        68         7        11
                                        (0,abcdef)        75        75         5         5
                                       (0,abcdefg)        82        82         4         4
                                           (5,NUL)        36        36         6         6
                                             (5,a)        43        43         7         7
                                            (5,ab)        50        50         8         8
                                           (5,abc)        57        57         9         9
                                          (5,abcd)        64        64        10        10
                                         (5,abcde)        71        71        11        11
                                        (5,abcdef)        78        78        12        12
                                       (5,abcdefg)        85        85        13        13

strcpy
                                           (0,NUL)         4        55        14        14 *
                                             (0,a)        14        58         4         4
                                            (0,ab)         5        70         5         5 *
                                           (0,abc)         8        80         4         4
                                          (0,abcd)         5        76         5         5 *
                                         (0,abcde)         5        79         5         5 *
                                        (0,abcdef)         7        82         7         7 *
                                       (0,abcdefg)         4        85         4         4 *
                                           (5,NUL)         6        53         6         6 *
                                             (5,a)         7        57         7         7 *
                                            (5,ab)         8        63         8         8 *
                                           (5,abc)         9        94         9         9 *
                                          (5,abcd)        10        97        10        10 *
                                         (5,abcde)        11       100        11        11 *
                                        (5,abcdef)        12       103        12        12 *
                                       (5,abcdefg)        13       115        13        13 *

and the resulting tester binary was 325285 bytes long (ie. ~230KB shorter).
Similar results on alpha or sparc.
On i686, it usually generates exactly the same code, sometimes just a bit faster.
But the important thing is that the compiler can know more things than the header.
Not to mention it speeds up compilation...

2002-01-23  Jakub Jelinek  <jakub@redhat.com>

	* string/bits/string2.h (__mempcpy): For gcc 3.0+, don't use
	__mempcpy_small but instead use __builtin_memcpy ( , , n) + n for
	short lengths and constant src.
	(strcpy): Don't optimize for gcc 3.0+.
	* (__stpcpy): For gcc 3.0+, don't use
	__stpcpy_small but instead use __builtin_strcpy (, src) + strlen (src)
	for short string literal src.

--- libc/string/bits/string2.h.jj	Wed Nov 21 13:32:37 2001
+++ libc/string/bits/string2.h	Wed Jan 23 20:09:05 2002
@@ -1,5 +1,5 @@
 /* Machine-independant string function optimizations.
-   Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1997,1998,1999,2000,2001,2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -198,26 +198,35 @@ __STRING2_COPY_TYPE (8);
 #ifdef __USE_GNU
 # if !defined _HAVE_STRING_ARCH_mempcpy || defined _FORCE_INLINES
 #  ifndef _HAVE_STRING_ARCH_mempcpy
-#   define __mempcpy(dest, src, n) \
+#   if __GNUC_PREREQ (3, 0)
+#    define __mempcpy(dest, src, n) \
+  (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n)      \
+		  && __string2_1bptr_p (src) && n <= 8			      \
+		  ? __builtin_memcpy (dest, src, n) + n			      \
+		  : __mempcpy (dest, src, n)))
+#   else
+#    define __mempcpy(dest, src, n) \
   (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n)      \
 		  && __string2_1bptr_p (src) && n <= 8			      \
 		  ? __mempcpy_small (dest, __mempcpy_args (src), n)	      \
 		  : __mempcpy (dest, src, n)))
+#   endif
 /* In glibc we use this function frequently but for namespace reasons
    we have to use the name `__mempcpy'.  */
 #   define mempcpy(dest, src, n) __mempcpy (dest, src, n)
 #  endif
 
-#  if _STRING_ARCH_unaligned
-#   ifndef _FORCE_INLINES
-#    define __mempcpy_args(src) \
+#  if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES
+#   if _STRING_ARCH_unaligned
+#    ifndef _FORCE_INLINES
+#     define __mempcpy_args(src) \
      ((__const char *) (src))[0], ((__const char *) (src))[2],		      \
      ((__const char *) (src))[4], ((__const char *) (src))[6],		      \
      __extension__ __STRING2_SMALL_GET16 (src, 0),			      \
      __extension__ __STRING2_SMALL_GET16 (src, 4),			      \
      __extension__ __STRING2_SMALL_GET32 (src, 0),			      \
      __extension__ __STRING2_SMALL_GET32 (src, 4)
-#   endif
+#    endif
 __STRING_INLINE void *__mempcpy_small (void *, char, char, char, char,
 				       __uint16_t, __uint16_t, __uint32_t,
 				       __uint32_t, size_t);
@@ -283,9 +292,9 @@ __mempcpy_small (void *__dest1,
     }
   return (void *) __u;
 }
-#  else
-#   ifndef _FORCE_INLINES
-#    define __mempcpy_args(src) \
+#   else
+#    ifndef _FORCE_INLINES
+#     define __mempcpy_args(src) \
      ((__const char *) (src))[0],					      \
      __extension__ ((__STRING2_COPY_ARR2)				      \
       { { ((__const char *) (src))[0], ((__const char *) (src))[1] } }),      \
@@ -313,7 +322,7 @@ __mempcpy_small (void *__dest1,
 	  ((__const char *) (src))[2], ((__const char *) (src))[3],	      \
 	  ((__const char *) (src))[4], ((__const char *) (src))[5],	      \
 	  ((__const char *) (src))[6], ((__const char *) (src))[7] } })
-#   endif
+#    endif
 __STRING_INLINE void *__mempcpy_small (void *, char, __STRING2_COPY_ARR2,
 				       __STRING2_COPY_ARR3,
 				       __STRING2_COPY_ARR4,
@@ -367,6 +376,7 @@ __mempcpy_small (void *__dest, char __sr
     }
   return __extension__ ((void *) __u + __srclen);
 }
+#   endif
 #  endif
 # endif
 #endif
@@ -383,8 +393,9 @@ extern void *__rawmemchr (const void *__
 
 
 /* Copy SRC to DEST.  */
-#if !defined _HAVE_STRING_ARCH_strcpy || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strcpy
+#if (!defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)) \
+    || defined _FORCE_INLINES
+# if !defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)
 #  define strcpy(dest, src) \
   (__extension__ (__builtin_constant_p (src)				      \
 		  ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8	      \
@@ -547,26 +558,38 @@ __strcpy_small (char *__dest,
 #ifdef __USE_GNU
 # if !defined _HAVE_STRING_ARCH_stpcpy || defined _FORCE_INLINES
 #  ifndef _HAVE_STRING_ARCH_stpcpy
-#   define __stpcpy(dest, src) \
+#   if __GNUC_PREREQ (3, 0)
+#    define __stpcpy(dest, src) \
+  (__extension__ (__builtin_constant_p (src)				      \
+		  ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8	      \
+		     ? __builtin_strcpy (dest, src) + strlen (src)	      \
+		     : ((char *) (__mempcpy) (dest, src, strlen (src) + 1)    \
+			- 1))						      \
+		  : __stpcpy (dest, src)))
+#   else
+#    define __stpcpy(dest, src) \
   (__extension__ (__builtin_constant_p (src)				      \
 		  ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8	      \
 		     ? __stpcpy_small (dest, __stpcpy_args (src),	      \
 				       strlen (src) + 1)		      \
-		     : ((char *) __mempcpy (dest, src, strlen (src) + 1) - 1))\
+		     : ((char *) (__mempcpy) (dest, src, strlen (src) + 1)    \
+			- 1))						      \
 		  : __stpcpy (dest, src)))
+#   endif
 /* In glibc we use this function frequently but for namespace reasons
    we have to use the name `__stpcpy'.  */
 #   define stpcpy(dest, src) __stpcpy (dest, src)
 #  endif
 
-#  if _STRING_ARCH_unaligned
-#   ifndef _FORCE_INLINES
-#    define __stpcpy_args(src) \
+#  if !__GNUC_PREREQ (3, 0) || _FORCE_INLINES
+#   if _STRING_ARCH_unaligned
+#    ifndef _FORCE_INLINES
+#     define __stpcpy_args(src) \
      __extension__ __STRING2_SMALL_GET16 (src, 0),			      \
      __extension__ __STRING2_SMALL_GET16 (src, 4),			      \
      __extension__ __STRING2_SMALL_GET32 (src, 0),			      \
      __extension__ __STRING2_SMALL_GET32 (src, 4)
-#   endif
+#    endif
 __STRING_INLINE char *__stpcpy_small (char *, __uint16_t, __uint16_t,
 				      __uint32_t, __uint32_t, size_t);
 __STRING_INLINE char *
@@ -626,9 +649,9 @@ __stpcpy_small (char *__dest,
     }
   return &__u->__c;
 }
-#  else
-#   ifndef _FORCE_INLINES
-#    define __stpcpy_args(src) \
+#   else
+#    ifndef _FORCE_INLINES
+#     define __stpcpy_args(src) \
      __extension__ ((__STRING2_COPY_ARR2)				      \
       { { ((__const char *) (src))[0], '\0' } }),			      \
      __extension__ ((__STRING2_COPY_ARR3)				      \
@@ -655,7 +678,7 @@ __stpcpy_small (char *__dest,
 	  ((__const char *) (src))[2], ((__const char *) (src))[3],	      \
 	  ((__const char *) (src))[4], ((__const char *) (src))[5],	      \
 	  ((__const char *) (src))[6], '\0' } })
-#   endif
+#    endif
 __STRING_INLINE char *__stpcpy_small (char *, __STRING2_COPY_ARR2,
 				      __STRING2_COPY_ARR3,
 				      __STRING2_COPY_ARR4,
@@ -709,6 +732,7 @@ __stpcpy_small (char *__dest,
   }
   return __dest + __srclen - 1;
 }
+#   endif
 #  endif
 # endif
 #endif

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]