This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! I was surprised by code which came up from foo = stpcpy(foo, "./") on ia64, so I ran my stringops benchmark I posted on i686/ia64/alpha/sparc/sparc64 here some months ago. On all !_STRING_ARCH_unaligned platforms (ia64/alpha/sparc*), all of mempcpy/strcpy/stpcpy were way slower if "optimizing" and generated much larger code. Here are e.g. ia64 results (legend as before, in ticks, gcc is without bits/string*.h opts, likewise nob, but with -fno-builtin too, str is -D__USE_STRING_INLINES and str2 are just string2.h opts (without any options), * means the time without any bits/string*.h opts is faster or as fast as the others): gcc nob str str2 mempcpy (0,a,1) 45 45 280 276 * (0,a,2) 48 48 292 290 * (0,ab,3) 51 51 276 276 * (0,abc,4) 54 54 299 299 * (0,abcd,5) 57 57 308 311 * (0,abcde,6) 60 60 324 332 * (0,abcdef,7) 63 63 305 306 * (0,abcdefg,8) 66 66 322 322 * (0,quitelongstring,0) 34 35 155 155 * (0,quitelongstring,1) 50 45 287 283 * (0,quitelongstring,2) 56 48 295 296 * (0,quitelongstring,3) 63 51 302 303 * (0,quitelongstring,4) 70 54 303 299 * (0,quitelongstring,5) 77 57 305 302 * (0,quitelongstring,6) 84 60 307 302 * (0,quitelongstring,7) 91 63 307 310 * (0,quitelongstring,8) 98 66 317 322 * (5,a,1) 48 48 274 277 * (5,a,2) 51 51 304 291 * (5,ab,3) 54 54 275 276 * (5,abc,4) 57 57 303 306 * (5,abcd,5) 60 60 306 315 * (5,abcde,6) 63 63 328 328 * (5,abcdef,7) 66 66 304 304 * (5,abcdefg,8) 69 69 323 320 * (5,quitelongstring,0) 37 37 160 153 * (5,quitelongstring,1) 52 48 285 287 * (5,quitelongstring,2) 59 51 299 298 * (5,quitelongstring,3) 66 54 302 296 * (5,quitelongstring,4) 73 57 303 299 * (5,quitelongstring,5) 80 60 304 306 * (5,quitelongstring,6) 87 63 303 301 * (5,quitelongstring,7) 94 67 313 306 * (5,quitelongstring,8) 101 69 320 320 * stpcpy (0,NUL) 33 34 286 290 * (0,a) 40 40 280 280 * (0,ab) 47 47 301 312 * (0,abc) 54 54 336 338 * (0,abcd) 61 61 315 323 * (0,abcde) 122 72 312 323 * (0,abcdef) 138 75 305 312 * (0,abcdefg) 154 82 325 326 * (5,NUL) 36 36 288 291 * (5,a) 43 43 277 285 * (5,ab) 50 58 306 317 * (5,abc) 57 57 338 336 * (5,abcd) 64 64 319 323 * (5,abcde) 125 71 302 313 * (5,abcdef) 145 78 309 316 * (5,abcdefg) 157 85 319 320 * strcpy (0,NUL) 4 51 300 290 * (0,a) 14 54 274 292 * (0,ab) 5 66 308 308 * (0,abc) 4 69 330 338 * (0,abcd) 5 72 310 335 * (0,abcde) 5 75 314 318 * (0,abcdef) 7 78 309 313 * (0,abcdefg) 4 81 322 331 * (5,NUL) 6 49 288 285 * (5,a) 7 53 279 277 * (5,ab) 8 59 304 317 * (5,abc) 9 90 330 335 * (5,abcd) 10 93 302 319 * (5,abcde) 11 96 302 305 * (5,abcdef) 12 99 307 315 * (5,abcdefg) 13 111 319 321 * The resulting tester binary was 553221 bytes. With the patch below: mempcpy (0,a,1) 45 45 14 14 (0,a,2) 48 48 4 4 (0,ab,3) 51 51 7 7 (0,abc,4) 54 54 4 4 (0,abcd,5) 57 57 7 7 (0,abcde,6) 60 60 7 7 (0,abcdef,7) 63 63 7 7 (0,abcdefg,8) 66 66 4 4 (0,quitelongstring,0) 35 35 1 1 (0,quitelongstring,1) 45 45 4 4 (0,quitelongstring,2) 49 48 14 14 (0,quitelongstring,3) 51 51 7 7 (0,quitelongstring,4) 54 54 4 4 (0,quitelongstring,5) 57 57 7 7 (0,quitelongstring,6) 60 60 7 7 (0,quitelongstring,7) 63 63 7 7 (0,quitelongstring,8) 66 66 4 4 (5,a,1) 48 48 6 6 (5,a,2) 51 51 7 7 (5,ab,3) 54 54 8 8 (5,abc,4) 57 57 9 9 (5,abcd,5) 60 60 10 10 (5,abcde,6) 63 63 11 11 (5,abcdef,7) 66 66 12 12 (5,abcdefg,8) 69 69 13 13 (5,quitelongstring,0) 37 37 4 4 (5,quitelongstring,1) 48 48 6 6 (5,quitelongstring,2) 51 51 7 7 (5,quitelongstring,3) 54 54 8 8 (5,quitelongstring,4) 57 57 9 9 (5,quitelongstring,5) 60 60 10 10 (5,quitelongstring,6) 63 63 11 11 (5,quitelongstring,7) 66 66 12 12 (5,quitelongstring,8) 69 69 13 13 stpcpy (0,NUL) 33 33 14 14 (0,a) 40 40 4 4 (0,ab) 47 47 5 5 (0,abc) 54 54 4 4 (0,abcd) 61 61 5 5 (0,abcde) 68 68 7 11 (0,abcdef) 75 75 5 5 (0,abcdefg) 82 82 4 4 (5,NUL) 36 36 6 6 (5,a) 43 43 7 7 (5,ab) 50 50 8 8 (5,abc) 57 57 9 9 (5,abcd) 64 64 10 10 (5,abcde) 71 71 11 11 (5,abcdef) 78 78 12 12 (5,abcdefg) 85 85 13 13 strcpy (0,NUL) 4 55 14 14 * (0,a) 14 58 4 4 (0,ab) 5 70 5 5 * (0,abc) 8 80 4 4 (0,abcd) 5 76 5 5 * (0,abcde) 5 79 5 5 * (0,abcdef) 7 82 7 7 * (0,abcdefg) 4 85 4 4 * (5,NUL) 6 53 6 6 * (5,a) 7 57 7 7 * (5,ab) 8 63 8 8 * (5,abc) 9 94 9 9 * (5,abcd) 10 97 10 10 * (5,abcde) 11 100 11 11 * (5,abcdef) 12 103 12 12 * (5,abcdefg) 13 115 13 13 * and the resulting tester binary was 325285 bytes long (ie. ~230KB shorter). Similar results on alpha or sparc. On i686, it usually generates exactly the same code, sometimes just a bit faster. But the important thing is that the compiler can know more things than the header. Not to mention it speeds up compilation... 2002-01-23 Jakub Jelinek <jakub@redhat.com> * string/bits/string2.h (__mempcpy): For gcc 3.0+, don't use __mempcpy_small but instead use __builtin_memcpy ( , , n) + n for short lengths and constant src. (strcpy): Don't optimize for gcc 3.0+. * (__stpcpy): For gcc 3.0+, don't use __stpcpy_small but instead use __builtin_strcpy (, src) + strlen (src) for short string literal src. --- libc/string/bits/string2.h.jj Wed Nov 21 13:32:37 2001 +++ libc/string/bits/string2.h Wed Jan 23 20:09:05 2002 @@ -1,5 +1,5 @@ /* Machine-independant string function optimizations. - Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1997,1998,1999,2000,2001,2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -198,26 +198,35 @@ __STRING2_COPY_TYPE (8); #ifdef __USE_GNU # if !defined _HAVE_STRING_ARCH_mempcpy || defined _FORCE_INLINES # ifndef _HAVE_STRING_ARCH_mempcpy -# define __mempcpy(dest, src, n) \ +# if __GNUC_PREREQ (3, 0) +# define __mempcpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ + && __string2_1bptr_p (src) && n <= 8 \ + ? __builtin_memcpy (dest, src, n) + n \ + : __mempcpy (dest, src, n))) +# else +# define __mempcpy(dest, src, n) \ (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ && __string2_1bptr_p (src) && n <= 8 \ ? __mempcpy_small (dest, __mempcpy_args (src), n) \ : __mempcpy (dest, src, n))) +# endif /* In glibc we use this function frequently but for namespace reasons we have to use the name `__mempcpy'. */ # define mempcpy(dest, src, n) __mempcpy (dest, src, n) # endif -# if _STRING_ARCH_unaligned -# ifndef _FORCE_INLINES -# define __mempcpy_args(src) \ +# if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES +# if _STRING_ARCH_unaligned +# ifndef _FORCE_INLINES +# define __mempcpy_args(src) \ ((__const char *) (src))[0], ((__const char *) (src))[2], \ ((__const char *) (src))[4], ((__const char *) (src))[6], \ __extension__ __STRING2_SMALL_GET16 (src, 0), \ __extension__ __STRING2_SMALL_GET16 (src, 4), \ __extension__ __STRING2_SMALL_GET32 (src, 0), \ __extension__ __STRING2_SMALL_GET32 (src, 4) -# endif +# endif __STRING_INLINE void *__mempcpy_small (void *, char, char, char, char, __uint16_t, __uint16_t, __uint32_t, __uint32_t, size_t); @@ -283,9 +292,9 @@ __mempcpy_small (void *__dest1, } return (void *) __u; } -# else -# ifndef _FORCE_INLINES -# define __mempcpy_args(src) \ +# else +# ifndef _FORCE_INLINES +# define __mempcpy_args(src) \ ((__const char *) (src))[0], \ __extension__ ((__STRING2_COPY_ARR2) \ { { ((__const char *) (src))[0], ((__const char *) (src))[1] } }), \ @@ -313,7 +322,7 @@ __mempcpy_small (void *__dest1, ((__const char *) (src))[2], ((__const char *) (src))[3], \ ((__const char *) (src))[4], ((__const char *) (src))[5], \ ((__const char *) (src))[6], ((__const char *) (src))[7] } }) -# endif +# endif __STRING_INLINE void *__mempcpy_small (void *, char, __STRING2_COPY_ARR2, __STRING2_COPY_ARR3, __STRING2_COPY_ARR4, @@ -367,6 +376,7 @@ __mempcpy_small (void *__dest, char __sr } return __extension__ ((void *) __u + __srclen); } +# endif # endif # endif #endif @@ -383,8 +393,9 @@ extern void *__rawmemchr (const void *__ /* Copy SRC to DEST. */ -#if !defined _HAVE_STRING_ARCH_strcpy || defined _FORCE_INLINES -# ifndef _HAVE_STRING_ARCH_strcpy +#if (!defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)) \ + || defined _FORCE_INLINES +# if !defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0) # define strcpy(dest, src) \ (__extension__ (__builtin_constant_p (src) \ ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ @@ -547,26 +558,38 @@ __strcpy_small (char *__dest, #ifdef __USE_GNU # if !defined _HAVE_STRING_ARCH_stpcpy || defined _FORCE_INLINES # ifndef _HAVE_STRING_ARCH_stpcpy -# define __stpcpy(dest, src) \ +# if __GNUC_PREREQ (3, 0) +# define __stpcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ + ? __builtin_strcpy (dest, src) + strlen (src) \ + : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \ + - 1)) \ + : __stpcpy (dest, src))) +# else +# define __stpcpy(dest, src) \ (__extension__ (__builtin_constant_p (src) \ ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ ? __stpcpy_small (dest, __stpcpy_args (src), \ strlen (src) + 1) \ - : ((char *) __mempcpy (dest, src, strlen (src) + 1) - 1))\ + : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \ + - 1)) \ : __stpcpy (dest, src))) +# endif /* In glibc we use this function frequently but for namespace reasons we have to use the name `__stpcpy'. */ # define stpcpy(dest, src) __stpcpy (dest, src) # endif -# if _STRING_ARCH_unaligned -# ifndef _FORCE_INLINES -# define __stpcpy_args(src) \ +# if !__GNUC_PREREQ (3, 0) || _FORCE_INLINES +# if _STRING_ARCH_unaligned +# ifndef _FORCE_INLINES +# define __stpcpy_args(src) \ __extension__ __STRING2_SMALL_GET16 (src, 0), \ __extension__ __STRING2_SMALL_GET16 (src, 4), \ __extension__ __STRING2_SMALL_GET32 (src, 0), \ __extension__ __STRING2_SMALL_GET32 (src, 4) -# endif +# endif __STRING_INLINE char *__stpcpy_small (char *, __uint16_t, __uint16_t, __uint32_t, __uint32_t, size_t); __STRING_INLINE char * @@ -626,9 +649,9 @@ __stpcpy_small (char *__dest, } return &__u->__c; } -# else -# ifndef _FORCE_INLINES -# define __stpcpy_args(src) \ +# else +# ifndef _FORCE_INLINES +# define __stpcpy_args(src) \ __extension__ ((__STRING2_COPY_ARR2) \ { { ((__const char *) (src))[0], '\0' } }), \ __extension__ ((__STRING2_COPY_ARR3) \ @@ -655,7 +678,7 @@ __stpcpy_small (char *__dest, ((__const char *) (src))[2], ((__const char *) (src))[3], \ ((__const char *) (src))[4], ((__const char *) (src))[5], \ ((__const char *) (src))[6], '\0' } }) -# endif +# endif __STRING_INLINE char *__stpcpy_small (char *, __STRING2_COPY_ARR2, __STRING2_COPY_ARR3, __STRING2_COPY_ARR4, @@ -709,6 +732,7 @@ __stpcpy_small (char *__dest, } return __dest + __srclen - 1; } +# endif # endif # endif #endif Jakub
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |