This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH] powerpc64: strcpy optimization for unaligned string

From: OndÅej BÃlka <neleai at seznam dot cz>
To: Rajalakshmi Srinivasaraghavan <raji at linux dot vnet dot ibm dot com>
Cc: libc-alpha at sourceware dot org
Date: Thu, 18 Dec 2014 22:13:48 +0100
Subject: Re: [PATCH] powerpc64: strcpy optimization for unaligned string
Authentication-results: sourceware.org; auth=none
References: <1418832071-93495-1-git-send-email-raji at linux dot vnet dot ibm dot com> <5491A9A5 dot 2000400 at linux dot vnet dot ibm dot com>

On Wed, Dec 17, 2014 at 09:34:53PM +0530, Rajalakshmi Srinivasaraghavan wrote:
> 
> 
> This patch optimizes strcpy for ppc64 for unaligned source or
> destination address. The source or destination address is aligned
> to doubleword and data is shifted based on the alignment and
> added with the previous loaded data to be written as a doubleword.
> For each load, cmpb instruction is used for faster null check.
> 
> More combination of unaligned inputs is also added in benchtest
> to measure the improvement.The new optimization shows 2 to 80% of
> performance improvement for longer string though it does not show
> big difference on string size less than 16 due to additional checks.
> 
> This patch is tested on powerpc64 BE and LE and I have also attached
> the benchtest result.
> 
As I wrote that benchtests are suspect first retest what happens if you
do not always call strcpy with same input and output buffer. What
diffence that makes in benchmark?


diff --git a/benchtests/bench-strcpy.c b/benchtests/bench-strcpy.c
index c3ab4cf..0329f60 100644
--- a/benchtests/bench-strcpy.c
+++ b/benchtests/bench-strcpy.c
@@ -71,25 +71,25 @@ SIMPLE_STRCPY (CHAR *dst, const CHAR *src)
 typedef CHAR *(*proto_t) (CHAR *, const CHAR *);
 
 static void
-do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
+do_one_test (impl_t *impl, CHAR **dst, CHAR **src,
 	     size_t len __attribute__((unused)))
 {
   size_t i, iters = INNER_LOOP_ITERS;
   timing_t start, stop, cur;
 
-  if (CALL (impl, dst, src) != STRCPY_RESULT (dst, len))
+  if (CALL (impl, dst[0], src[0]) != STRCPY_RESULT (dst[0], len[0]))
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
-	     CALL (impl, dst, src), STRCPY_RESULT (dst, len));
+	     CALL (impl, dst[0], src[0]), STRCPY_RESULT (dst[0], len));
       ret = 1;
       return;
     }
 
-  if (STRCMP (dst, src) != 0)
+  if (STRCMP (dst[0], src[0]) != 0)
     {
       error (0, 0,
 	     "Wrong result in function %s dst \"%" sfmt "\" src \"%" sfmt "\"",
-	     impl->name, dst, src);
+	     impl->name, dst[0], src[0]);
       ret = 1;
       return;
     }
@@ -97,7 +97,7 @@ do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
   TIMING_NOW (start);
   for (i = 0; i < iters; ++i)
     {
-	  CALL (impl, dst, src);
+	  CALL (impl, dst[i % 16], src[i % 16]);
     }
   TIMING_NOW (stop);
 
@@ -109,8 +109,8 @@ do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
 static void
 do_test (size_t align1, size_t align2, size_t len, int max_char)
 {
-  size_t i;
-  CHAR *s1, *s2;
+  size_t i, j;
+  CHAR **s1, **s2;
 /* For wcscpy: align1 and align2 here mean alignment not in bytes,
    but in wchar_ts, in bytes it will equal to align * (sizeof (wchar_t))
    len for wcschr here isn't in bytes but it's number of wchar_t symbols.  */
@@ -122,12 +122,17 @@ do_test (size_t align1, size_t align2, size_t len, int max_char)
   if ((align2 + len) * sizeof(CHAR) >= page_size)
     return;
 
-  s1 = (CHAR *) (buf1) + align1;
-  s2 = (CHAR *) (buf2) + align2;
+  s1 = calloc (sizeof (char *), 16);
+  s2 = calloc (sizeof (char *), 16);
+  for (j = 0; j < 16; j++)
+    {
+      s1[j] = ((CHAR *) calloc (align1 + len + 1, sizeof (CHAR))) + align1;
+      s2[j] = ((CHAR *) calloc (align2 + len + 1, sizeof (CHAR))) + align2;
 
-  for (i = 0; i < len; i++)
-    s1[i] = 32 + 23 * i % (max_char - 32);
-  s1[len] = 0;
+      for (i = 0; i < len; i++)
+        s1[j][i] = 32 + 23 * i % (max_char - 32);
+      s1[j][len] = 0;
+    }
 
   printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));

Follow-Ups:
- Re: [PATCH] powerpc64: strcpy optimization for unaligned string
  - From: Rajalakshmi Srinivasaraghavan

References:
- [PATCH] powerpc64: strcpy optimization for unaligned string
  - From: Rajalakshmi Srinivasaraghavan

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]