This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.22-14-g05a910f


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  05a910f7b420c2b831f35ba90e61c80f001c0606 (commit)
       via  f29ac72effae859140bb0d7fffdb1e6cef0ffed0 (commit)
       via  f6482cf29d3094ca9688be59802353014c528959 (commit)
       via  7b1c56e4834aa3b139fea39ded64a7e901be89a2 (commit)
       via  3136eb7abd3e45a8622c0272181816c1a92e1f65 (commit)
      from  782723d6d8c7b599c4fdbbd359a7bc57b25044ad (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=05a910f7b420c2b831f35ba90e61c80f001c0606

commit 05a910f7b420c2b831f35ba90e61c80f001c0606
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Aug 5 15:58:15 2015 +0100

    Improve performance of mempcpy by inlining and using memcpy. Enable
    this for all targets except sparc which has an optimized mempcpy
    implementation.

diff --git a/ChangeLog b/ChangeLog
index 65592c3..4a45eed 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* string/string.h: (mempcpy): Redirect to __mempcpy_inline.
+	(__mempcpy): Likewise.  (__mempcpy_inline): New inline function.
+	* sysdeps/sparc/bits/string.h: (_HAVE_STRING_ARCH_mempcpy): Define.
+
+2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* string/memccpy.c (memccpy):
 	Improve performance by using memchr/memcpy/__mempcpy.
 
diff --git a/string/string.h b/string/string.h
index 54a4d39..3ab7103 100644
--- a/string/string.h
+++ b/string/string.h
@@ -636,6 +636,25 @@ extern char *basename (const char *__filename) __THROW __nonnull ((1));
 # endif
 #endif
 
+#if defined __USE_GNU && defined __OPTIMIZE__ \
+    && defined __extern_always_inline && __GNUC_PREREQ (3,2)
+# if !defined _FORCE_INLINES && !defined _HAVE_STRING_ARCH_mempcpy
+
+#undef mempcpy
+#undef __mempcpy
+#define mempcpy(dest, src, n) __mempcpy_inline (dest, src, n)
+#define __mempcpy(dest, src, n) __mempcpy_inline (dest, src, n)
+
+__extern_always_inline void *
+__mempcpy_inline (void *__restrict __dest,
+		  const void *__restrict __src, size_t __n)
+{
+  return (char *) memcpy (__dest, __src, __n) + __n;
+}
+
+# endif
+#endif
+
 __END_DECLS
 
 #endif /* string.h  */
diff --git a/sysdeps/sparc/bits/string.h b/sysdeps/sparc/bits/string.h
index 36fbb4c..4eb9447 100644
--- a/sysdeps/sparc/bits/string.h
+++ b/sysdeps/sparc/bits/string.h
@@ -26,3 +26,6 @@
 /* sparc32 and sparc64 strchr(x, '\0') perform better than
    __rawmemchr(x, '\0').  */
 #define _HAVE_STRING_ARCH_strchr 1
+
+/* Don't inline mempcpy into memcpy as sparc has an optimized mempcpy.  */
+#define _HAVE_STRING_ARCH_mempcpy 1

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f29ac72effae859140bb0d7fffdb1e6cef0ffed0

commit f29ac72effae859140bb0d7fffdb1e6cef0ffed0
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Aug 5 15:24:06 2015 +0100

    Improve memccpy performance by using memchr/memcpy/mempcpy rather than
    a byte loop. Overall performance on bench-memccpy is > 2x faster when
    using the C implementation of memchr and an optimized memcpy.

diff --git a/ChangeLog b/ChangeLog
index 5f40e4e..65592c3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* string/memccpy.c (memccpy):
+	Improve performance by using memchr/memcpy/__mempcpy.
+
+2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* string/strncpy.c (strncpy):
 	Improve performance by using __strnlen/memcpy.
 
diff --git a/string/memccpy.c b/string/memccpy.c
index d9ed697..0987c84 100644
--- a/string/memccpy.c
+++ b/string/memccpy.c
@@ -26,15 +26,12 @@
 void *
 __memccpy (void *dest, const void *src, int c, size_t n)
 {
-  const char *s = src;
-  char *d = dest;
-  const char x = c;
-  size_t i = n;
+  void *p = memchr (src, c, n);
 
-  while (i-- > 0)
-    if ((*d++ = *s++) == x)
-      return d;
+  if (p != NULL)
+    return __mempcpy (dest, src, p - src + 1);
 
+  memcpy (dest, src, n);
   return NULL;
 }
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f6482cf29d3094ca9688be59802353014c528959

commit f6482cf29d3094ca9688be59802353014c528959
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Aug 5 15:15:28 2015 +0100

    This patch improves strncpy performance by using strnlen/memcpy rather than a byte loop. Performance
    on bench-strncpy is 1.9-2.1x faster on average. I tried several variations, and using a tailcall and
    calling memset conditionally gave the best overall results.

diff --git a/ChangeLog b/ChangeLog
index d013561..5f40e4e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* string/strncpy.c (strncpy):
+	Improve performance by using __strnlen/memcpy.
+
+2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept):
 	Optimize to avoid an unnecessary FPCR read.
 
diff --git a/string/strncpy.c b/string/strncpy.c
index 37af5aa..d464bbb 100644
--- a/string/strncpy.c
+++ b/string/strncpy.c
@@ -16,68 +16,19 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
-#include <memcopy.h>
 
 #undef strncpy
 
 #ifndef STRNCPY
-#define STRNCPY strncpy
+ #define STRNCPY strncpy
 #endif
 
 char *
 STRNCPY (char *s1, const char *s2, size_t n)
 {
-  char c;
-  char *s = s1;
-
-  --s1;
-
-  if (n >= 4)
-    {
-      size_t n4 = n >> 2;
-
-      for (;;)
-	{
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    break;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    break;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    break;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    break;
-	  if (--n4 == 0)
-	    goto last_chars;
-	}
-      s1++;
-      n = n - (s1 - s);
-      memset (s1, '\0', n);
-      return s;
-    }
-
- last_chars:
-  n &= 3;
-  if (n == 0)
-    return s;
-
-  do
-    {
-      c = *s2++;
-      *++s1 = c;
-      if (--n == 0)
-	return s;
-    }
-  while (c != '\0');
-
-  memset (s1 + 1, '\0', n);
-  return s;
+  size_t size = __strnlen (s2, n);
+  if (size != n)
+    memset (s1 + size, '\0', n - size);
+  return memcpy (s1, s2, size);
 }
 libc_hidden_builtin_def (strncpy)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7b1c56e4834aa3b139fea39ded64a7e901be89a2

commit 7b1c56e4834aa3b139fea39ded64a7e901be89a2
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Aug 5 15:03:08 2015 +0100

    Improve feenableexcept performance - avoid an unnecessary FPCR read in case
    the FPCR does not change. Also improve the logic of the return value.

diff --git a/ChangeLog b/ChangeLog
index ecf9d7b..d013561 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept):
+	Optimize to avoid an unnecessary FPCR read.
+
+2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
 	Optimize to reduce FPCR/FPSR accesses.
 
diff --git a/sysdeps/aarch64/fpu/feenablxcpt.c b/sysdeps/aarch64/fpu/feenablxcpt.c
index 82ed0b6..a0f736c 100644
--- a/sysdeps/aarch64/fpu/feenablxcpt.c
+++ b/sysdeps/aarch64/fpu/feenablxcpt.c
@@ -24,24 +24,22 @@ feenableexcept (int excepts)
 {
   fpu_control_t fpcr;
   fpu_control_t fpcr_new;
+  fpu_control_t updated_fpcr;
 
   _FPU_GETCW (fpcr);
   excepts &= FE_ALL_EXCEPT;
   fpcr_new = fpcr | (excepts << FE_EXCEPT_SHIFT);
 
   if (fpcr != fpcr_new)
-    _FPU_SETCW (fpcr_new);
-
-  /* Trapping exceptions are optional in AArch64 the relevant enable
-     bits in FPCR are RES0 hence the absence of support can be
-     detected by reading back the FPCR and comparing with the required
-     value.  */
-  if (excepts)
     {
-      fpu_control_t updated_fpcr;
+      _FPU_SETCW (fpcr_new);
 
+      /* Trapping exceptions are optional in AArch64; the relevant enable
+	 bits in FPCR are RES0 hence the absence of support can be detected
+	 by reading back the FPCR and comparing with the required value.  */
       _FPU_GETCW (updated_fpcr);
-      if (((updated_fpcr >> FE_EXCEPT_SHIFT) & excepts) != excepts)
+
+      if (fpcr_new & ~updated_fpcr)
 	return -1;
     }
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3136eb7abd3e45a8622c0272181816c1a92e1f65

commit 3136eb7abd3e45a8622c0272181816c1a92e1f65
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Aug 5 14:57:37 2015 +0100

    Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.
    It uses the same logic as the ARM version. The common case removes 1 FPSR
    and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
    case the FPCR does not change.

diff --git a/ChangeLog b/ChangeLog
index adeba92..ecf9d7b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-08-05  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
+	Optimize to reduce FPCR/FPSR accesses.
+
 2015-08-05  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* locale/loadarchive.c (_nl_archive_subfreeres): Also check
diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c
index f47115f..bd56187 100644
--- a/sysdeps/aarch64/fpu/fesetenv.c
+++ b/sysdeps/aarch64/fpu/fesetenv.c
@@ -29,8 +29,20 @@ __fesetenv (const fenv_t *envp)
   fpu_fpsr_t fpsr_new;
 
   _FPU_GETCW (fpcr);
-  _FPU_GETFPSR (fpsr);
 
+  if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
+    {
+      /* The new FPCR/FPSR are valid, so don't merge the reserved flags.  */
+      fpcr_new = envp->__fpcr;
+
+      if (fpcr != fpcr_new)
+	_FPU_SETCW (fpcr_new);
+
+      _FPU_SETFPSR (envp->__fpsr);
+      return 0;
+    }
+
+  _FPU_GETFPSR (fpsr);
   fpcr_new = fpcr & _FPU_RESERVED;
   fpsr_new = fpsr & _FPU_FPSR_RESERVED;
 
@@ -39,31 +51,25 @@ __fesetenv (const fenv_t *envp)
       fpcr_new |= _FPU_DEFAULT;
       fpsr_new |= _FPU_FPSR_DEFAULT;
     }
-  else if (envp == FE_NOMASK_ENV)
+  else
     {
       fpcr_new |= _FPU_FPCR_IEEE;
       fpsr_new |= _FPU_FPSR_IEEE;
     }
-  else
-    {
-      fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
-      fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
-    }
 
-  if (fpsr != fpsr_new)
-    _FPU_SETFPSR (fpsr_new);
+  _FPU_SETFPSR (fpsr_new);
 
   if (fpcr != fpcr_new)
-    _FPU_SETCW (fpcr_new);
+    {
+      _FPU_SETCW (fpcr_new);
 
-  /* Trapping exceptions are optional in AArch64 the relevant enable
-     bits in FPCR are RES0 hence the absence of support can be
-     detected by reading back the FPCR and comparing with the required
-     value.  */
+      /* Trapping exceptions are optional in AArch64; the relevant enable
+	 bits in FPCR are RES0 hence the absence of support can be detected
+	 by reading back the FPCR and comparing with the required value.  */
+      _FPU_GETCW (updated_fpcr);
 
-  _FPU_GETCW (updated_fpcr);
-  if ((updated_fpcr & fpcr_new) != fpcr_new)
-    return 1;
+      return fpcr_new & ~updated_fpcr;
+    }
 
   return 0;
 }

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                         |   26 ++++++++++++++++
 string/memccpy.c                  |   11 ++----
 string/string.h                   |   19 ++++++++++++
 string/strncpy.c                  |   59 +++---------------------------------
 sysdeps/aarch64/fpu/feenablxcpt.c |   16 ++++-----
 sysdeps/aarch64/fpu/fesetenv.c    |   40 ++++++++++++++----------
 sysdeps/sparc/bits/string.h       |    3 ++
 7 files changed, 87 insertions(+), 87 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]