This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.12-83-g42e08a5
- From: drepper at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 30 Jul 2010 07:14:29 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.12-83-g42e08a5
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 42e08a5438ddbd9d550d914733c0bc5ba96d79ec (commit)
via fe36dd025ea34c5c082b688592618ec72369b96b (commit)
from 76e6d6bca72c611653e70d67c2168d2e91a371eb (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=42e08a5438ddbd9d550d914733c0bc5ba96d79ec
commit 42e08a5438ddbd9d550d914733c0bc5ba96d79ec
Author: Ulrich Drepper <drepper@redhat.com>
Date: Fri Jul 30 00:14:04 2010 -0700
Implement optimized strcaecmp for x86-64.
diff --git a/ChangeLog b/ChangeLog
index f19b63b..7b8c416 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
2010-07-30 Ulrich Drepper <drepper@redhat.com>
+ * string/Makefile (strop-tests): Add strcasecmp.
+ * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
+ strcasecmp_l-nonascii.
+ (gen-as-const-headers): Add locale-defines.sym.
+ * sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation.
+ * sysdeps/x86_64/strcasecmp.S: New file.
+ * sysdeps/x86_64/strcasecmp_l.S: New file.
+ * sysdeps/x86_64/strcasecmp_l-nonascii.c: New file.
+ * sysdeps/x86_64/locale-defines.sym: New file.
+ * string/test-strcasecmp.c: New file.
+
* string/test-strcasestr.c: Test both ends of the range of characters.
* sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
diff --git a/NEWS b/NEWS
index 8358f62..8d9bb43 100644
--- a/NEWS
+++ b/NEWS
@@ -13,7 +13,7 @@ Version 2.13
* POWER7 optimizations: memset, memcmp, strncmp
-* New optimized string functions for x86-64: strnlen
+* New optimized string functions for x86-64: strnlen, strcasecmp
Implemented by Ulrich Drepper.
Version 2.12
diff --git a/string/Makefile b/string/Makefile
index e8eb514..4c160e9 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -49,7 +49,7 @@ o-objects.ob := memcpy.o memset.o memchr.o
strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
strlen strncmp strncpy strpbrk strrchr strspn memmem \
- strstr strcasestr strnlen
+ strstr strcasestr strnlen strcasecmp
tests := tester inl-tester noinl-tester testcopy test-ffs \
tst-strlen stratcliff tst-svc tst-inlcall \
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
diff --git a/string/test-strcasecmp.c b/string/test-strcasecmp.c
new file mode 100644
index 0000000..7d1d110
--- /dev/null
+++ b/string/test-strcasecmp.c
@@ -0,0 +1,276 @@
+/* Test and measure strcasecmp functions.
+ Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Jakub Jelinek <jakub@redhat.com>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <ctype.h>
+#define TEST_MAIN
+#include "test-string.h"
+
+typedef int (*proto_t) (const char *, const char *);
+static int simple_strcasecmp (const char *, const char *);
+static int stupid_strcasecmp (const char *, const char *);
+
+IMPL (stupid_strcasecmp, 0)
+IMPL (simple_strcasecmp, 0)
+IMPL (strcasecmp, 1)
+
+static int
+simple_strcasecmp (const char *s1, const char *s2)
+{
+ int ret;
+
+ while ((ret = ((unsigned char) tolower (*s1)
+ - (unsigned char) tolower (*s2))) == 0
+ && *s1++)
+ ++s2;
+ return ret;
+}
+
+static int
+stupid_strcasecmp (const char *s1, const char *s2)
+{
+ size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
+ size_t n = ns1 < ns2 ? ns1 : ns2;
+ int ret = 0;
+
+ while (n--)
+ {
+ if ((ret = ((unsigned char) tolower (*s1)
+ - (unsigned char) tolower (*s2))) != 0)
+ break;
+ ++s1;
+ ++s2;
+ }
+ return ret;
+}
+
+static void
+do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
+{
+ int result = CALL (impl, s1, s2);
+ if ((exp_result == 0 && result != 0)
+ || (exp_result < 0 && result >= 0)
+ || (exp_result > 0 && result <= 0))
+ {
+ error (0, 0, "Wrong result in function %s %d %d", impl->name,
+ result, exp_result);
+ ret = 1;
+ return;
+ }
+
+ if (HP_TIMING_AVAIL)
+ {
+ hp_timing_t start __attribute ((unused));
+ hp_timing_t stop __attribute ((unused));
+ hp_timing_t best_time = ~ (hp_timing_t) 0;
+ size_t i;
+
+ for (i = 0; i < 32; ++i)
+ {
+ HP_TIMING_NOW (start);
+ CALL (impl, s1, s2);
+ HP_TIMING_NOW (stop);
+ HP_TIMING_BEST (best_time, start, stop);
+ }
+
+ printf ("\t%zd", (size_t) best_time);
+ }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len, int max_char,
+ int exp_result)
+{
+ size_t i;
+ char *s1, *s2;
+
+ if (len == 0)
+ return;
+
+ align1 &= 7;
+ if (align1 + len + 1 >= page_size)
+ return;
+
+ align2 &= 7;
+ if (align2 + len + 1 >= page_size)
+ return;
+
+ s1 = (char *) (buf1 + align1);
+ s2 = (char *) (buf2 + align2);
+
+ for (i = 0; i < len; i++)
+ {
+ s1[i] = toupper (1 + 23 * i % max_char);
+ s2[i] = tolower (s1[i]);
+ }
+
+ s1[len] = s2[len] = 0;
+ s1[len + 1] = 23;
+ s2[len + 1] = 24 + exp_result;
+ if ((s2[len - 1] == 'z' && exp_result == -1)
+ || (s2[len - 1] == 'a' && exp_result == 1))
+ s1[len - 1] += exp_result;
+ else
+ s2[len - 1] -= exp_result;
+
+ if (HP_TIMING_AVAIL)
+ printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, s1, s2, exp_result);
+
+ if (HP_TIMING_AVAIL)
+ putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+ size_t i, j, n, align1, align2, pos, len1, len2;
+ int result;
+ long r;
+ unsigned char *p1 = buf1 + page_size - 512;
+ unsigned char *p2 = buf2 + page_size - 512;
+
+ for (n = 0; n < ITERATIONS; n++)
+ {
+ align1 = random () & 31;
+ if (random () & 1)
+ align2 = random () & 31;
+ else
+ align2 = align1 + (random () & 24);
+ pos = random () & 511;
+ j = align1 > align2 ? align1 : align2;
+ if (pos + j >= 511)
+ pos = 510 - j - (random () & 7);
+ len1 = random () & 511;
+ if (pos >= len1 && (random () & 1))
+ len1 = pos + (random () & 7);
+ if (len1 + j >= 512)
+ len1 = 511 - j - (random () & 7);
+ if (pos >= len1)
+ len2 = len1;
+ else
+ len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+ j = (pos > len2 ? pos : len2) + align1 + 64;
+ if (j > 512)
+ j = 512;
+ for (i = 0; i < j; ++i)
+ {
+ p1[i] = tolower (random () & 255);
+ if (i < len1 + align1 && !p1[i])
+ {
+ p1[i] = tolower (random () & 255);
+ if (!p1[i])
+ p1[i] = tolower (1 + (random () & 127));
+ }
+ }
+ for (i = 0; i < j; ++i)
+ {
+ p2[i] = toupper (random () & 255);
+ if (i < len2 + align2 && !p2[i])
+ {
+ p2[i] = toupper (random () & 255);
+ if (!p2[i])
+ toupper (p2[i] = 1 + (random () & 127));
+ }
+ }
+
+ result = 0;
+ memcpy (p2 + align2, p1 + align1, pos);
+ if (pos < len1)
+ {
+ if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+ {
+ p2[align2 + pos] = toupper (random () & 255);
+ if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+ p2[align2 + pos] = toupper (p1[align1 + pos]
+ + 3 + (random () & 127));
+ }
+
+ if (p1[align1 + pos] < tolower (p2[align2 + pos]))
+ result = -1;
+ else
+ result = 1;
+ }
+ p1[len1 + align1] = 0;
+ p2[len2 + align2] = 0;
+
+ FOR_EACH_IMPL (impl, 1)
+ {
+ r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
+ /* Test whether on 64-bit architectures where ABI requires
+ callee to promote has the promotion been done. */
+ asm ("" : "=g" (r) : "0" (r));
+ if ((r == 0 && result)
+ || (r < 0 && result >= 0)
+ || (r > 0 && result <= 0))
+ {
+ error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+ n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
+ ret = 1;
+ }
+ }
+ }
+}
+
+int
+test_main (void)
+{
+ size_t i;
+
+ test_init ();
+
+ printf ("%23s", "");
+ FOR_EACH_IMPL (impl, 0)
+ printf ("\t%s", impl->name);
+ putchar ('\n');
+
+ for (i = 1; i < 16; ++i)
+ {
+ do_test (i, i, i, 127, 0);
+ do_test (i, i, i, 127, 1);
+ do_test (i, i, i, 127, -1);
+ }
+
+ for (i = 1; i < 10; ++i)
+ {
+ do_test (0, 0, 2 << i, 127, 0);
+ do_test (0, 0, 2 << i, 254, 0);
+ do_test (0, 0, 2 << i, 127, 1);
+ do_test (0, 0, 2 << i, 254, 1);
+ do_test (0, 0, 2 << i, 127, -1);
+ do_test (0, 0, 2 << i, 254, -1);
+ }
+
+ for (i = 1; i < 8; ++i)
+ {
+ do_test (i, 2 * i, 8 << i, 127, 0);
+ do_test (2 * i, i, 8 << i, 254, 0);
+ do_test (i, 2 * i, 8 << i, 127, 1);
+ do_test (2 * i, i, 8 << i, 254, 1);
+ do_test (i, 2 * i, 8 << i, 127, -1);
+ do_test (2 * i, i, 8 << i, 254, -1);
+ }
+
+ do_random_tests ();
+ return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index e8d0285..f7eeb15 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -12,7 +12,8 @@ sysdep_routines += _mcount
endif
ifeq ($(subdir),string)
-sysdep_routines += cacheinfo
+sysdep_routines += cacheinfo strcasecmp_l-nonascii
+gen-as-const-headers += locale-defines.sym
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/x86_64/locale-defines.sym b/sysdeps/x86_64/locale-defines.sym
new file mode 100644
index 0000000..aebff9a
--- /dev/null
+++ b/sysdeps/x86_64/locale-defines.sym
@@ -0,0 +1,11 @@
+#include <locale/localeinfo.h>
+#include <langinfo.h>
+#include <stddef.h>
+
+--
+
+LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales)
+LC_CTYPE
+_NL_CTYPE_NONASCII_CASE
+LOCALE_DATA_VALUES offsetof (struct __locale_data, values)
+SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0])
diff --git a/sysdeps/x86_64/strcasecmp.S b/sysdeps/x86_64/strcasecmp.S
new file mode 100644
index 0000000..fe49e82
--- /dev/null
+++ b/sysdeps/x86_64/strcasecmp.S
@@ -0,0 +1 @@
+/* In strcasecmp_l.S. */
diff --git a/sysdeps/x86_64/strcasecmp_l-nonascii.c b/sysdeps/x86_64/strcasecmp_l-nonascii.c
new file mode 100644
index 0000000..7a0a04f
--- /dev/null
+++ b/sysdeps/x86_64/strcasecmp_l-nonascii.c
@@ -0,0 +1,5 @@
+#include <string.h>
+
+#define __strcasecmp_l __strcasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL 1
+#include <string/strcasecmp.c>
diff --git a/sysdeps/x86_64/strcasecmp_l.S b/sysdeps/x86_64/strcasecmp_l.S
new file mode 100644
index 0000000..5456b3a
--- /dev/null
+++ b/sysdeps/x86_64/strcasecmp_l.S
@@ -0,0 +1,6 @@
+#define STRCMP __strcasecmp_l
+#define USE_AS_STRCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
+libc_hidden_def (strcasecmp_l)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index ac3fe14..7b2b246 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -51,6 +51,15 @@
je LABEL(strcmp_exitz); \
mov %r9, %r11
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+
+/* No support for strcasecmp outside libc so far since it is not needed. */
+# ifdef NOT_IN_lib
+# error "strcasecmp_l not implemented so far"
+# endif
+
+# define UPDATE_STRNCMP_COUNTER
#else
# define UPDATE_STRNCMP_COUNTER
# ifndef STRCMP
@@ -64,6 +73,19 @@
.section .text.ssse3,"ax",@progbits
#endif
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp)
+ movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
+ movq %fs:(%rax),%rdx
+
+ /* 5-byte NOP. */
+ .byte 0x0f,0x1f,0x44,0x00,0x00
+END (__strcasecmp)
+weak_alias (__strcasecmp, strcasecmp)
+libc_hidden_def (__strcasecmp)
+ /* FALLTHROUGH to strcasecmp_l. */
+#endif
+
ENTRY (BP_SYM (STRCMP))
#ifdef NOT_IN_libc
/* Simple version since we can't use SSE registers in ld.so. */
@@ -84,6 +106,18 @@ L(neq): movl $1, %eax
ret
END (BP_SYM (STRCMP))
#else /* NOT_IN_libc */
+# ifdef USE_AS_STRCASECMP_L
+ /* We have to fall back on the C implementation for locales
+ with encodings not matching ASCII for single bytes. */
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
+# else
+ movq (%rdx), %rax
+# endif
+ testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ jne __strcasecmp_l_nonascii
+# endif
+
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
@@ -99,6 +133,26 @@ END (BP_SYM (STRCMP))
/* Use 64bit AND here to avoid long NOP padding. */
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
+# ifdef USE_AS_STRCASECMP_L
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+.Lbelowupper:
+ .quad 0x4040404040404040
+ .quad 0x4040404040404040
+.Ltopupper:
+ .quad 0x5b5b5b5b5b5b5b5b
+ .quad 0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .previous
+ movdqa .Lbelowupper(%rip), %xmm5
+# define UCLOW_reg %xmm5
+ movdqa .Ltopupper(%rip), %xmm6
+# define UCHIGH_reg %xmm6
+ movdqa .Ltouppermask(%rip), %xmm7
+# define LCQWORD_reg %xmm7
+# endif
cmp $0x30, %ecx
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
cmp $0x30, %eax
@@ -107,6 +161,26 @@ END (BP_SYM (STRCMP))
movlpd (%rsi), %xmm2
movhpd 8(%rdi), %xmm1
movhpd 8(%rsi), %xmm2
+# ifdef USE_AS_STRCASECMP_L
+# define TOLOWER(reg1, reg2) \
+ movdqa reg1, %xmm8; \
+ movdqa UCHIGH_reg, %xmm9; \
+ movdqa reg2, %xmm10; \
+ movdqa UCHIGH_reg, %xmm11; \
+ pcmpgtb UCLOW_reg, %xmm8; \
+ pcmpgtb reg1, %xmm9; \
+ pcmpgtb UCLOW_reg, %xmm10; \
+ pcmpgtb reg2, %xmm11; \
+ pand %xmm9, %xmm8; \
+ pand %xmm11, %xmm10; \
+ pand LCQWORD_reg, %xmm8; \
+ pand LCQWORD_reg, %xmm10; \
+ por %xmm8, reg1; \
+ por %xmm10, reg2
+ TOLOWER (%xmm1, %xmm2)
+# else
+# define TOLOWER(reg1, reg2)
+# endif
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
@@ -159,7 +233,13 @@ LABEL(ashr_0):
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+# ifndef USE_AS_STRCASECMP_L
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
+# else
+ movdqa (%rdi), %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
+# endif
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
@@ -183,6 +263,7 @@ LABEL(ashr_0):
LABEL(loop_ashr_0):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -198,6 +279,7 @@ LABEL(loop_ashr_0):
add $16, %rcx
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -214,7 +296,7 @@ LABEL(loop_ashr_0):
/*
* The following cases will be handled by ashr_1
- * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
@@ -224,6 +306,7 @@ LABEL(ashr_1):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pslldq $15, %xmm2 /* shift first string to align with second */
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %r9d
@@ -263,6 +346,7 @@ LABEL(gobble_ashr_1):
# else
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -292,6 +376,7 @@ LABEL(gobble_ashr_1):
# else
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -351,6 +436,7 @@ LABEL(ashr_2):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $14, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -390,6 +476,7 @@ LABEL(gobble_ashr_2):
# else
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -420,6 +507,7 @@ LABEL(gobble_ashr_2):
# else
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -472,6 +560,7 @@ LABEL(ashr_3):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $13, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -512,6 +601,7 @@ LABEL(gobble_ashr_3):
# else
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -542,6 +632,7 @@ LABEL(gobble_ashr_3):
# else
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -594,6 +685,7 @@ LABEL(ashr_4):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $12, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -634,6 +726,7 @@ LABEL(gobble_ashr_4):
# else
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -664,6 +757,7 @@ LABEL(gobble_ashr_4):
# else
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -716,6 +810,7 @@ LABEL(ashr_5):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $11, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -756,6 +851,7 @@ LABEL(gobble_ashr_5):
# else
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -786,6 +882,7 @@ LABEL(gobble_ashr_5):
# else
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -838,6 +935,7 @@ LABEL(ashr_6):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $10, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -878,6 +976,7 @@ LABEL(gobble_ashr_6):
# else
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -908,6 +1007,7 @@ LABEL(gobble_ashr_6):
# else
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -960,6 +1060,7 @@ LABEL(ashr_7):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $9, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1000,6 +1101,7 @@ LABEL(gobble_ashr_7):
# else
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1030,6 +1132,7 @@ LABEL(gobble_ashr_7):
# else
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1082,6 +1185,7 @@ LABEL(ashr_8):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $8, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1122,6 +1226,7 @@ LABEL(gobble_ashr_8):
# else
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1152,6 +1257,7 @@ LABEL(gobble_ashr_8):
# else
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1204,6 +1310,7 @@ LABEL(ashr_9):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $7, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1244,6 +1351,7 @@ LABEL(gobble_ashr_9):
# else
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1274,6 +1382,7 @@ LABEL(gobble_ashr_9):
# else
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1326,6 +1435,7 @@ LABEL(ashr_10):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $6, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1366,6 +1476,7 @@ LABEL(gobble_ashr_10):
# else
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1396,6 +1507,7 @@ LABEL(gobble_ashr_10):
# else
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1448,6 +1560,7 @@ LABEL(ashr_11):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $5, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1488,6 +1601,7 @@ LABEL(gobble_ashr_11):
# else
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1518,6 +1632,7 @@ LABEL(gobble_ashr_11):
# else
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1570,6 +1685,7 @@ LABEL(ashr_12):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $4, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1610,6 +1726,7 @@ LABEL(gobble_ashr_12):
# else
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1640,6 +1757,7 @@ LABEL(gobble_ashr_12):
# else
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1692,6 +1810,7 @@ LABEL(ashr_13):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1732,6 +1851,7 @@ LABEL(gobble_ashr_13):
# else
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1762,6 +1882,7 @@ LABEL(gobble_ashr_13):
# else
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1814,6 +1935,7 @@ LABEL(ashr_14):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $2, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1854,6 +1976,7 @@ LABEL(gobble_ashr_14):
# else
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1884,6 +2007,7 @@ LABEL(gobble_ashr_14):
# else
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1936,6 +2060,7 @@ LABEL(ashr_15):
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $1, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1978,6 +2103,7 @@ LABEL(gobble_ashr_15):
# else
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -2008,6 +2134,7 @@ LABEL(gobble_ashr_15):
# else
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -2049,6 +2176,7 @@ LABEL(ashr_15_exittail):
.p2align 4
LABEL(aftertail):
+ TOLOWER (%xmm1, %xmm3)
pcmpeqb %xmm3, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
@@ -2076,6 +2204,12 @@ LABEL(less16bytes):
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
+# ifdef USE_AS_STRCASECMP_L
+ leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
+ movl (%rdx,%rcx,4), %ecx
+ movl (%rdx,%rax,4), %eax
+# endif
+
sub %ecx, %eax
ret
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fe36dd025ea34c5c082b688592618ec72369b96b
commit fe36dd025ea34c5c082b688592618ec72369b96b
Author: Ulrich Drepper <drepper@redhat.com>
Date: Fri Jul 30 00:09:07 2010 -0700
Fix tolower operation in strcasestr.
diff --git a/ChangeLog b/ChangeLog
index cc53cf0..f19b63b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2010-07-30 Ulrich Drepper <drepper@redhat.com>
+
+ * string/test-strcasestr.c: Test both ends of the range of characters.
+ * sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
+
2010-07-29 Roland McGrath <roland@redhat.com>
[BZ #11856]
diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
index 547537c..edc41f3 100644
--- a/string/test-strcasestr.c
+++ b/string/test-strcasestr.c
@@ -97,7 +97,7 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
char *s1 = (char *) (buf1 + align1);
char *s2 = (char *) (buf2 + align2);
- static const char d[] = "1234567890abcdef";
+ static const char d[] = "1234567890abcxyz";
#define dl (sizeof (d) - 1)
char *ss2 = s2;
for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index e2b19a3..45d7a55 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -174,7 +174,7 @@ __m128i_strloadu_tolower (const unsigned char *p, __m128i rangeuc,
__m128i frag = __m128i_strloadu (p);
#define UCLOW 0x4040404040404040ULL
-#define UCHIGH 0x5a5a5a5a5a5a5a5aULL
+#define UCHIGH 0x5b5b5b5b5b5b5b5bULL
#define LCQWORD 0x2020202020202020ULL
/* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */
__m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag);
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 16 ++
NEWS | 2 +-
string/Makefile | 2 +-
string/test-strcasecmp.c | 276 ++++++++++++++++++++++++++++++++
string/test-strcasestr.c | 2 +-
sysdeps/x86_64/Makefile | 3 +-
sysdeps/x86_64/locale-defines.sym | 11 ++
sysdeps/x86_64/multiarch/strstr.c | 2 +-
sysdeps/x86_64/strcasecmp.S | 1 +
sysdeps/x86_64/strcasecmp_l-nonascii.c | 5 +
sysdeps/x86_64/strcasecmp_l.S | 6 +
sysdeps/x86_64/strcmp.S | 136 ++++++++++++++++-
12 files changed, 456 insertions(+), 6 deletions(-)
create mode 100644 string/test-strcasecmp.c
create mode 100644 sysdeps/x86_64/locale-defines.sym
create mode 100644 sysdeps/x86_64/strcasecmp.S
create mode 100644 sysdeps/x86_64/strcasecmp_l-nonascii.c
create mode 100644 sysdeps/x86_64/strcasecmp_l.S
hooks/post-receive
--
GNU C Library master sources