This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Clean up SSE variable shifts


----- "Richard Henderson" <rth@twiddle.net> wrote:
> And for good measure, here's a third patch which implements a
> variable shift via pshufb.

As a forth variant try this patch with the following change.  Considering the reduced memory footprint (one cache line instead of four) and the improved capabilities for unaligned SSE memory operations in modern CPUs it might be even faster.


diff -u b/sysdeps/x86_64/multiarch/varshift.S b/sysdeps/x86_64/multiarch/varshift.S
--- b/sysdeps/x86_64/multiarch/varshift.S
+++ b/sysdeps/x86_64/multiarch/varshift.S
@@ -26,20 +26,5 @@
-	.size	___m128i_shift_right, 256
+	.size	___m128i_shift_right, 31
 
-	.balign 16
 ___m128i_shift_right:
 	.byte	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15
-	.byte	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, -1
-	.byte	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, -1, -1
-	.byte	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, -1, -1, -1
-	.byte	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1
-	.byte	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1
-	.byte	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1
-	.byte	  7,  8,  9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1
-	.byte	  8,  9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	  9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-	.byte	 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+	.byte	 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
diff -u b/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
--- b/sysdeps/x86_64/multiarch/varshift.h
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -20,8 +20,8 @@
 
-extern __m128i ___m128i_shift_right[16] __attribute__((visibility("hidden")));
+extern char ___m128i_shift_right[31] __attribute__((visibility("hidden")));
 
 static __inline__ __m128i
 __m128i_shift_right (__m128i value, unsigned long offset)
 {
-  return _mm_shuffle_epi8 (value, ___m128i_shift_right[offset]);
+  return _mm_shuffle_epi8 (value, _mm_loadu_si128 ((__m128 *) (___m128i_shift_right + offset)));
 }


-- 
â Ulrich Drepper â Red Hat, Inc. â 444 Castro St â Mountain View, CA â


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]