This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
Community source repository for glibc add-on ports branch, master, updated. glibc-2.12-29-g539af0f
- From: rth at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 23 Sep 2010 15:05:24 -0000
- Subject: Community source repository for glibc add-on ports branch, master, updated. glibc-2.12-29-g539af0f
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Community source repository for glibc add-on ports".
The branch, master has been updated
via 539af0feeebbf489fe070c0cb787f9fab77c0d0c (commit)
via 926cf114f7ca2b19116cac005303040648e17e77 (commit)
via 7ffd2bd725c3e4d77e6bfe36b76500d20427929d (commit)
from cf64098fc4aef77a4b708e5d92aedced16964390 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc-ports.git;a=commitdiff;h=539af0feeebbf489fe070c0cb787f9fab77c0d0c
commit 539af0feeebbf489fe070c0cb787f9fab77c0d0c
Merge: cf64098 926cf11
Author: Richard Henderson <rth@twiddle.net>
Date: Thu Sep 23 08:05:17 2010 -0700
Merge branch 'rth/testing'
http://sources.redhat.com/git/gitweb.cgi?p=glibc-ports.git;a=commitdiff;h=926cf114f7ca2b19116cac005303040648e17e77
commit 926cf114f7ca2b19116cac005303040648e17e77
Author: Richard Henderson <rth@twiddle.net>
Date: Wed Sep 15 10:41:43 2010 -0700
alpha: rewrite memchr.
[BZ #12019]
The new implementation does not read too much data.
diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index a12407c..d1c0e12 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,4 +1,11 @@
-2010-05-05 Richard Henderson <rth@redhat.com>
+2010-09-23 Richard Henderson <rth@redhat.com>
+
+ [BZ #12019]
+ * sysdeps/alpha/alphaev6/memchr.S: Remove.
+ * sysdeps/alpha/memchr.S: Remove.
+ * sysdeps/alpha/memchr.c: New.
+
+2010-09-23 Richard Henderson <rth@redhat.com>
[BZ #1864]
* sysdeps/unix/sysv/linux/alpha/fstatfs64.c: New.
diff --git a/sysdeps/alpha/alphaev6/memchr.S b/sysdeps/alpha/alphaev6/memchr.S
deleted file mode 100644
index fe77cd8..0000000
--- a/sysdeps/alpha/alphaev6/memchr.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David Mosberger (davidm@cs.arizona.edu).
- EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <sysdep.h>
-
- .arch ev6
- .set noreorder
- .set noat
-
-ENTRY(__memchr)
-#ifdef PROF
- ldgp gp, 0(pv)
- lda AT, _mcount
- jsr AT, (AT), _mcount
- .prologue 1
-#else
- .prologue 0
-#endif
-
- # Hack -- if someone passes in (size_t)-1, hoping to just
- # search til the end of the address space, we will overflow
- # below when we find the address of the last byte. Given
- # that we will never have a 56-bit address space, cropping
- # the length is the easiest way to avoid trouble.
- zap $18, 0x80, $5 # U : Bound length
- beq $18, $not_found # U :
- ldq_u $1, 0($16) # L : load first quadword Latency=3
- and $17, 0xff, $17 # E : L L U U : 00000000000000ch
-
- insbl $17, 1, $2 # U : 000000000000ch00
- cmpult $18, 9, $4 # E : small (< 1 quad) string?
- or $2, $17, $17 # E : 000000000000chch
- lda $3, -1($31) # E : U L L U
-
- sll $17, 16, $2 # U : 00000000chch0000
- addq $16, $5, $5 # E : Max search address
- or $2, $17, $17 # E : 00000000chchchch
- sll $17, 32, $2 # U : U L L U : chchchch00000000
-
- or $2, $17, $17 # E : chchchchchchchch
- extql $1, $16, $7 # U : $7 is upper bits
- beq $4, $first_quad # U :
- ldq_u $6, -1($5) # L : L U U L : eight or less bytes to search Latency=3
-
- extqh $6, $16, $6 # U : 2 cycle stall for $6
- mov $16, $0 # E :
- nop # E :
- or $7, $6, $1 # E : L U L U $1 = quadword starting at $16
-
- # Deal with the case where at most 8 bytes remain to be searched
- # in $1. E.g.:
- # $18 = 6
- # $1 = ????c6c5c4c3c2c1
-$last_quad:
- negq $18, $6 # E :
- xor $17, $1, $1 # E :
- srl $3, $6, $6 # U : $6 = mask of $18 bits set
- cmpbge $31, $1, $2 # E : L U L U
-
- nop
- nop
- and $2, $6, $2 # E :
- beq $2, $not_found # U : U L U L
-
-$found_it:
-#if defined(__alpha_fix__) && defined(__alpha_cix__)
- /*
- * Since we are guaranteed to have set one of the bits, we don't
- * have to worry about coming back with a 0x40 out of cttz...
- */
- cttz $2, $3 # U0 :
- addq $0, $3, $0 # E : All done
- nop # E :
- ret # L0 : L U L U
-#else
- /*
- * Slow and clunky. It can probably be improved.
- * An exercise left for others.
- */
- negq $2, $3 # E :
- and $2, $3, $2 # E :
- and $2, 0x0f, $1 # E :
- addq $0, 4, $3 # E :
-
- cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
- nop # E : keep with cmov
- and $2, 0x33, $1 # E :
- addq $0, 2, $3 # E : U L U L : 2 cycle stall on $0
-
- cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
- nop # E : keep with cmov
- and $2, 0x55, $1 # E :
- addq $0, 1, $3 # E : U L U L : 2 cycle stall on $0
-
- cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
- nop
- nop
- ret # L0 : L U L U
-#endif
-
- # Deal with the case where $18 > 8 bytes remain to be
- # searched. $16 may not be aligned.
- .align 4
-$first_quad:
- andnot $16, 0x7, $0 # E :
- insqh $3, $16, $2 # U : $2 = 0000ffffffffffff ($16<0:2> ff)
- xor $1, $17, $1 # E :
- or $1, $2, $1 # E : U L U L $1 = ====ffffffffffff
-
- cmpbge $31, $1, $2 # E :
- bne $2, $found_it # U :
- # At least one byte left to process.
- ldq $31, 8($0) # L :
- subq $5, 1, $18 # E : U L U L
-
- addq $0, 8, $0 # E :
- # Make $18 point to last quad to be accessed (the
- # last quad may or may not be partial).
- andnot $18, 0x7, $18 # E :
- cmpult $0, $18, $2 # E :
- beq $2, $final # U : U L U L
-
- # At least two quads remain to be accessed.
-
- subq $18, $0, $4 # E : $4 <- nr quads to be processed
- and $4, 8, $4 # E : odd number of quads?
- bne $4, $odd_quad_count # U :
- # At least three quads remain to be accessed
- nop # E : L U L U : move prefetched value to correct reg
-
- .align 4
-$unrolled_loop:
- ldq $1, 0($0) # L : load quad
- xor $17, $1, $2 # E :
- ldq $31, 8($0) # L : prefetch next quad
- cmpbge $31, $2, $2 # E : U L U L
-
- bne $2, $found_it # U :
- addq $0, 8, $0 # E :
- nop # E :
- nop # E :
-
-$odd_quad_count:
- ldq $1, 0($0) # L : load quad
- xor $17, $1, $2 # E :
- ldq $31, 8($0) # L : prefetch $4
- cmpbge $31, $2, $2 # E :
-
- addq $0, 8, $6 # E :
- bne $2, $found_it # U :
- cmpult $6, $18, $6 # E :
- addq $0, 8, $0 # E :
-
- bne $6, $unrolled_loop # U :
- nop # E :
- nop # E :
- nop # E :
-
-$final: ldq $1, 0($0) # L : load last quad
- subq $5, $0, $18 # E : $18 <- number of bytes left to do
- nop # E :
- bne $18, $last_quad # U :
-
-$not_found:
- mov $31, $0 # E :
- nop # E :
- nop # E :
- ret # L0 :
-
- END(__memchr)
-
-weak_alias (__memchr, memchr)
-#if !__BOUNDED_POINTERS__
-weak_alias (__memchr, __ubp_memchr)
-#endif
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/alpha/memchr.S b/sysdeps/alpha/memchr.S
deleted file mode 100644
index 87c7fb1..0000000
--- a/sysdeps/alpha/memchr.S
+++ /dev/null
@@ -1,176 +0,0 @@
-/* Copyright (C) 1996, 2000, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David Mosberger (davidm@cs.arizona.edu).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-/* Finds characters in a memory area. Optimized for the Alpha:
-
- - memory accessed as aligned quadwords only
- - uses cmpbge to compare 8 bytes in parallel
- - does binary search to find 0 byte in last
- quadword (HAKMEM needed 12 instructions to
- do this instead of the 9 instructions that
- binary search needs).
-
-For correctness consider that:
-
- - only minimum number of quadwords may be accessed
- - the third argument is an unsigned long
-*/
-
-#include <sysdep.h>
-
- .set noreorder
- .set noat
-
-ENTRY(__memchr)
-#ifdef PROF
- ldgp gp, 0(pv)
- lda AT, _mcount
- jsr AT, (AT), _mcount
- .prologue 1
-#else
- .prologue 0
-#endif
-
- # Hack -- if someone passes in (size_t)-1, hoping to just
- # search til the end of the address space, we will overflow
- # below when we find the address of the last byte. Given
- # that we will never have a 56-bit address space, cropping
- # the length is the easiest way to avoid trouble.
- zap a2, 0x80, t4 #-e0 :
-
- beq a2, $not_found # .. e1 :
- ldq_u t0, 0(a0) # e1 : load first quadword
- insbl a1, 1, t1 # .. e0 : t1 = 000000000000ch00
- and a1, 0xff, a1 #-e0 : a1 = 00000000000000ch
- cmpult a2, 9, t3 # .. e1 :
- or t1, a1, a1 # e0 : a1 = 000000000000chch
- lda t2, -1(zero) # .. e1 :
- sll a1, 16, t1 #-e0 : t1 = 00000000chch0000
- addq a0, t4, t4 # .. e1 :
- or t1, a1, a1 # e1 : a1 = 00000000chchchch
- unop # :
- sll a1, 32, t1 #-e0 : t1 = chchchch00000000
- or t1, a1, a1 # e1 : a1 = chchchchchchchch
- extql t0, a0, t6 # e0 :
- beq t3, $first_quad # .. e1 :
-
- ldq_u t5, -1(t4) #-e1 : eight or less bytes to search
- extqh t5, a0, t5 # .. e0 :
- mov a0, v0 # e0 :
- or t6, t5, t0 # .. e1 : t0 = quadword starting at a0
-
- # Deal with the case where at most 8 bytes remain to be searched
- # in t0. E.g.:
- # a2 = 6
- # t0 = ????c6c5c4c3c2c1
-$last_quad:
- negq a2, t5 #-e0 :
- xor a1, t0, t0 # .. e1 :
- srl t2, t5, t5 # e0 : t5 = mask of a2 bits set
- cmpbge zero, t0, t1 # .. e1 :
- and t1, t5, t1 #-e0 :
- beq t1, $not_found # .. e1 :
-
-$found_it:
- # Now, determine which byte matched:
- negq t1, t2 # e0 :
- and t1, t2, t1 # e1 :
-
- and t1, 0x0f, t0 #-e0 :
- addq v0, 4, t2 # .. e1 :
- cmoveq t0, t2, v0 # e0 :
-
- addq v0, 2, t2 # .. e1 :
- and t1, 0x33, t0 #-e0 :
- cmoveq t0, t2, v0 # .. e1 :
-
- and t1, 0x55, t0 # e0 :
- addq v0, 1, t2 # .. e1 :
- cmoveq t0, t2, v0 #-e0 :
-
-$done: ret # .. e1 :
-
- # Deal with the case where a2 > 8 bytes remain to be
- # searched. a0 may not be aligned.
- .align 4
-$first_quad:
- andnot a0, 0x7, v0 #-e1 :
- insqh t2, a0, t1 # .. e0 : t1 = 0000ffffffffffff (a0<0:2> ff)
- xor t0, a1, t0 # e0 :
- or t0, t1, t0 # e1 : t0 = ====ffffffffffff
- cmpbge zero, t0, t1 #-e0 :
- bne t1, $found_it # .. e1 :
-
- # At least one byte left to process.
-
- ldq zero, 8(v0) # e0 : prefetch next quad
- subq t4, 1, a2 # .. e1 :
- addq v0, 8, v0 #-e0 :
-
- # Make a2 point to last quad to be accessed (the
- # last quad may or may not be partial).
-
- andnot a2, 0x7, a2 # .. e1 :
- cmpult v0, a2, t1 # e0 :
- beq t1, $final # .. e1 :
-
- # At least two quads remain to be accessed.
-
- subq a2, v0, t3 #-e0 : t3 <- nr quads to be processed
- and t3, 8, t3 # e1 : odd number of quads?
- bne t3, $odd_quad_count # e1 :
-
- # At least three quads remain to be accessed
-
- .align 4
-$unrolled_loop:
- ldq t0, 0(v0) # e0 : load quad
- xor a1, t0, t1 # .. e1 :
- ldq zero, 8(v0) # e0 : prefetch next quad
- cmpbge zero, t1, t1 # .. e1:
- bne t1, $found_it # e0 :
-
- addq v0, 8, v0 # e1 :
-$odd_quad_count:
- ldq t0, 0(v0) # e0 : load quad
- xor a1, t0, t1 # .. e1 :
- ldq zero, 8(v0) # e0 : prefetch next quad
- cmpbge zero, t1, t1 # .. e1 :
- addq v0, 8, t5 #-e0 :
- bne t1, $found_it # .. e1 :
-
- cmpult t5, a2, t5 # e0 :
- addq v0, 8, v0 # .. e1 :
- bne t5, $unrolled_loop #-e1 :
-
-$final: ldq t0, 0(v0) # e0 : load last quad
- subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
- bne a2, $last_quad # e1 :
-
-$not_found:
- mov zero, v0 #-e0 :
- ret # .. e1 :
-
- END(__memchr)
-
-weak_alias (__memchr, memchr)
-#if !__BOUNDED_POINTERS__
-weak_alias (__memchr, __ubp_memchr)
-#endif
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/alpha/memchr.c b/sysdeps/alpha/memchr.c
new file mode 100644
index 0000000..c52841b
--- /dev/null
+++ b/sysdeps/alpha/memchr.c
@@ -0,0 +1,175 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <string.h>
+
+typedef unsigned long word;
+
+static inline word
+ldq_u(const void *s)
+{
+ return *(const word *)((word)s & -8);
+}
+
+#define unlikely(X) __builtin_expect ((X), 0)
+#define prefetch(X) __builtin_prefetch ((void *)(X), 0)
+
+#define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X))
+#define find(X, Y) cmpbeq0 ((X) ^ (Y))
+
+/* Search no more than N bytes of S for C. */
+
+void *
+__memchr (const void *s, int xc, size_t n)
+{
+ const word *s_align;
+ word t, current, found, mask, offset;
+
+ if (unlikely (n == 0))
+ return 0;
+
+ current = ldq_u (s);
+
+ /* Replicate low byte of XC into all bytes of C. */
+ t = xc & 0xff; /* 0000000c */
+ t = (t << 8) | t; /* 000000cc */
+ t = (t << 16) | t; /* 0000cccc */
+ const word c = (t << 32) | t; /* cccccccc */
+
+ /* Align the source, and decrement the count by the number
+ of bytes searched in the first word. */
+ s_align = (const word *)(s & -8);
+ n += (s & 7);
+
+ /* Deal with misalignment in the first word for the comparison. */
+ mask = (1ul << (s & 7)) - 1;
+
+ /* If the entire string fits within one word, we may need masking
+ at both the front and the back of the string. */
+ if (unlikely (n <= 8))
+ {
+ mask |= -1ul << n;
+ goto last_quad;
+ }
+
+ found = find (current, c) & ~mask;
+ if (unlikely (found))
+ goto found_it;
+
+ s_align++;
+ n -= 8;
+
+ /* If the block is sufficiently large, align to cacheline and prefetch. */
+ if (unlikely (n >= 256))
+ {
+ /* Prefetch 3 cache lines beyond the one we're working on. */
+ prefetch (s_align + 8);
+ prefetch (s_align + 16);
+ prefetch (s_align + 24);
+
+ while ((word)s_align & 63)
+ {
+ current = *s_align;
+ found = find (current, c);
+ if (found)
+ goto found_it;
+ s_align++;
+ n -= 8;
+ }
+
+ /* Within each cacheline, advance the load for the next word
+ before the test for the previous word is complete. This
+ allows us to hide the 3 cycle L1 cache load latency. We
+ only perform this advance load within a cacheline to prevent
+ reading across page boundary. */
+#define CACHELINE_LOOP \
+ do { \
+ word i, next = s_align[0]; \
+ for (i = 0; i < 7; ++i) \
+ { \
+ current = next; \
+ next = s_align[1]; \
+ found = find (current, c); \
+ if (unlikely (found)) \
+ goto found_it; \
+ s_align++; \
+ } \
+ current = next; \
+ found = find (current, c); \
+ if (unlikely (found)) \
+ goto found_it; \
+ s_align++; \
+ n -= 64; \
+ } while (0)
+
+ /* While there's still lots more data to potentially be read,
+ continue issuing prefetches for the 4th cacheline out. */
+ while (n >= 256)
+ {
+ prefetch (s_align + 24);
+ CACHELINE_LOOP;
+ }
+
+ /* Up to 3 cache lines remaining. Continue issuing advanced
+ loads, but stop prefetching. */
+ while (n >= 64)
+ CACHELINE_LOOP;
+
+ /* We may have exhausted the buffer. */
+ if (n == 0)
+ return NULL;
+ }
+
+ /* Quadword aligned loop. */
+ current = *s_align;
+ while (n > 8)
+ {
+ found = find (current, c);
+ if (unlikely (found))
+ goto found_it;
+ current = *++s_align;
+ n -= 8;
+ }
+
+ /* The last word may need masking at the tail of the compare. */
+ mask = -1ul << n;
+ last_quad:
+ found = find (current, c) & ~mask;
+ if (found == 0)
+ return NULL;
+
+ found_it:
+#ifdef __alpha_cix__
+ offset = __builtin_alpha_cttz (found);
+#else
+ /* Extract LSB. */
+ found &= -found;
+
+ /* Binary search for the LSB. */
+ offset = (found & 0x0f ? 0 : 4);
+ offset += (found & 0x33 ? 0 : 2);
+ offset += (found & 0x55 ? 0 : 1);
+#endif
+
+ return (void *)((word)s_align + offset);
+}
+
+#ifdef weak_alias
+weak_alias (__memchr, BP_SYM (memchr))
+#endif
+libc_hidden_builtin_def (memchr)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog.alpha | 20 ++
sysdeps/alpha/alphaev6/memchr.S | 193 --------------------
sysdeps/alpha/memchr.S | 176 ------------------
sysdeps/alpha/memchr.c | 175 ++++++++++++++++++
sysdeps/unix/sysv/linux/alpha/fstatfs64.c | 1 +
sysdeps/unix/sysv/linux/alpha/fstatvfs.c | 1 +
sysdeps/unix/sysv/linux/alpha/fstatvfs64.c | 1 +
sysdeps/unix/sysv/linux/alpha/internal_statvfs64.c | 1 +
sysdeps/unix/sysv/linux/alpha/statfs64.c | 1 +
sysdeps/unix/sysv/linux/alpha/statvfs.c | 1 +
sysdeps/unix/sysv/linux/alpha/statvfs64.c | 1 +
sysdeps/unix/sysv/linux/alpha/syscalls.list | 5 +
12 files changed, 207 insertions(+), 369 deletions(-)
delete mode 100644 sysdeps/alpha/alphaev6/memchr.S
delete mode 100644 sysdeps/alpha/memchr.S
create mode 100644 sysdeps/alpha/memchr.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/fstatfs64.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/fstatvfs.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/fstatvfs64.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/internal_statvfs64.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/statfs64.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/statvfs.c
create mode 100644 sysdeps/unix/sysv/linux/alpha/statvfs64.c
hooks/post-receive
--
Community source repository for glibc add-on ports