This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.12-83-g42e08a5


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  42e08a5438ddbd9d550d914733c0bc5ba96d79ec (commit)
       via  fe36dd025ea34c5c082b688592618ec72369b96b (commit)
      from  76e6d6bca72c611653e70d67c2168d2e91a371eb (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=42e08a5438ddbd9d550d914733c0bc5ba96d79ec

commit 42e08a5438ddbd9d550d914733c0bc5ba96d79ec
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Fri Jul 30 00:14:04 2010 -0700

    Implement optimized strcaecmp for x86-64.

diff --git a/ChangeLog b/ChangeLog
index f19b63b..7b8c416 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
 2010-07-30  Ulrich Drepper  <drepper@redhat.com>
 
+	* string/Makefile (strop-tests): Add strcasecmp.
+	* sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
+	strcasecmp_l-nonascii.
+	(gen-as-const-headers): Add locale-defines.sym.
+	* sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation.
+	* sysdeps/x86_64/strcasecmp.S: New file.
+	* sysdeps/x86_64/strcasecmp_l.S: New file.
+	* sysdeps/x86_64/strcasecmp_l-nonascii.c: New file.
+	* sysdeps/x86_64/locale-defines.sym: New file.
+	* string/test-strcasecmp.c: New file.
+
 	* string/test-strcasestr.c: Test both ends of the range of characters.
 	* sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
 
diff --git a/NEWS b/NEWS
index 8358f62..8d9bb43 100644
--- a/NEWS
+++ b/NEWS
@@ -13,7 +13,7 @@ Version 2.13
 
 * POWER7 optimizations: memset, memcmp, strncmp
 
-* New optimized string functions for x86-64: strnlen
+* New optimized string functions for x86-64: strnlen, strcasecmp
   Implemented by Ulrich Drepper.
 
 Version 2.12
diff --git a/string/Makefile b/string/Makefile
index e8eb514..4c160e9 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -49,7 +49,7 @@ o-objects.ob	:= memcpy.o memset.o memchr.o
 strop-tests	:= memchr memcmp memcpy memmove mempcpy memset memccpy	\
 		   stpcpy stpncpy strcat strchr strcmp strcpy strcspn	\
 		   strlen strncmp strncpy strpbrk strrchr strspn memmem	\
-		   strstr strcasestr strnlen
+		   strstr strcasestr strnlen strcasecmp
 tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
 		   tst-strlen stratcliff tst-svc tst-inlcall		\
 		   bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap	\
diff --git a/string/test-strcasecmp.c b/string/test-strcasecmp.c
new file mode 100644
index 0000000..7d1d110
--- /dev/null
+++ b/string/test-strcasecmp.c
@@ -0,0 +1,276 @@
+/* Test and measure strcasecmp functions.
+   Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Jakub Jelinek <jakub@redhat.com>, 1999.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <ctype.h>
+#define TEST_MAIN
+#include "test-string.h"
+
+typedef int (*proto_t) (const char *, const char *);
+static int simple_strcasecmp (const char *, const char *);
+static int stupid_strcasecmp (const char *, const char *);
+
+IMPL (stupid_strcasecmp, 0)
+IMPL (simple_strcasecmp, 0)
+IMPL (strcasecmp, 1)
+
+static int
+simple_strcasecmp (const char *s1, const char *s2)
+{
+  int ret;
+
+  while ((ret = ((unsigned char) tolower (*s1)
+		 - (unsigned char) tolower (*s2))) == 0
+	 && *s1++)
+    ++s2;
+  return ret;
+}
+
+static int
+stupid_strcasecmp (const char *s1, const char *s2)
+{
+  size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
+  size_t n = ns1 < ns2 ? ns1 : ns2;
+  int ret = 0;
+
+  while (n--)
+    {
+      if ((ret = ((unsigned char) tolower (*s1)
+		  - (unsigned char) tolower (*s2))) != 0)
+	break;
+      ++s1;
+      ++s2;
+    }
+  return ret;
+}
+
+static void
+do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
+{
+  int result = CALL (impl, s1, s2);
+  if ((exp_result == 0 && result != 0)
+      || (exp_result < 0 && result >= 0)
+      || (exp_result > 0 && result <= 0))
+    {
+      error (0, 0, "Wrong result in function %s %d %d", impl->name,
+	     result, exp_result);
+      ret = 1;
+      return;
+    }
+
+  if (HP_TIMING_AVAIL)
+    {
+      hp_timing_t start __attribute ((unused));
+      hp_timing_t stop __attribute ((unused));
+      hp_timing_t best_time = ~ (hp_timing_t) 0;
+      size_t i;
+
+      for (i = 0; i < 32; ++i)
+	{
+	  HP_TIMING_NOW (start);
+	  CALL (impl, s1, s2);
+	  HP_TIMING_NOW (stop);
+	  HP_TIMING_BEST (best_time, start, stop);
+	}
+
+      printf ("\t%zd", (size_t) best_time);
+    }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len, int max_char,
+	 int exp_result)
+{
+  size_t i;
+  char *s1, *s2;
+
+  if (len == 0)
+    return;
+
+  align1 &= 7;
+  if (align1 + len + 1 >= page_size)
+    return;
+
+  align2 &= 7;
+  if (align2 + len + 1 >= page_size)
+    return;
+
+  s1 = (char *) (buf1 + align1);
+  s2 = (char *) (buf2 + align2);
+
+  for (i = 0; i < len; i++)
+    {
+      s1[i] = toupper (1 + 23 * i % max_char);
+      s2[i] = tolower (s1[i]);
+    }
+
+  s1[len] = s2[len] = 0;
+  s1[len + 1] = 23;
+  s2[len + 1] = 24 + exp_result;
+  if ((s2[len - 1] == 'z' && exp_result == -1)
+      || (s2[len - 1] == 'a' && exp_result == 1))
+    s1[len - 1] += exp_result;
+  else
+    s2[len - 1] -= exp_result;
+
+  if (HP_TIMING_AVAIL)
+    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s1, s2, exp_result);
+
+  if (HP_TIMING_AVAIL)
+    putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+  size_t i, j, n, align1, align2, pos, len1, len2;
+  int result;
+  long r;
+  unsigned char *p1 = buf1 + page_size - 512;
+  unsigned char *p2 = buf2 + page_size - 512;
+
+  for (n = 0; n < ITERATIONS; n++)
+    {
+      align1 = random () & 31;
+      if (random () & 1)
+	align2 = random () & 31;
+      else
+	align2 = align1 + (random () & 24);
+      pos = random () & 511;
+      j = align1 > align2 ? align1 : align2;
+      if (pos + j >= 511)
+	pos = 510 - j - (random () & 7);
+      len1 = random () & 511;
+      if (pos >= len1 && (random () & 1))
+	len1 = pos + (random () & 7);
+      if (len1 + j >= 512)
+	len1 = 511 - j - (random () & 7);
+      if (pos >= len1)
+	len2 = len1;
+      else
+	len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+      j = (pos > len2 ? pos : len2) + align1 + 64;
+      if (j > 512)
+	j = 512;
+      for (i = 0; i < j; ++i)
+	{
+	  p1[i] = tolower (random () & 255);
+	  if (i < len1 + align1 && !p1[i])
+	    {
+	      p1[i] = tolower (random () & 255);
+	      if (!p1[i])
+		p1[i] = tolower (1 + (random () & 127));
+	    }
+	}
+      for (i = 0; i < j; ++i)
+	{
+	  p2[i] = toupper (random () & 255);
+	  if (i < len2 + align2 && !p2[i])
+	    {
+	      p2[i] = toupper (random () & 255);
+	      if (!p2[i])
+		toupper (p2[i] = 1 + (random () & 127));
+	    }
+	}
+
+      result = 0;
+      memcpy (p2 + align2, p1 + align1, pos);
+      if (pos < len1)
+	{
+	  if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+	    {
+	      p2[align2 + pos] = toupper (random () & 255);
+	      if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+		p2[align2 + pos] = toupper (p1[align1 + pos]
+					    + 3 + (random () & 127));
+	    }
+
+	  if (p1[align1 + pos] < tolower (p2[align2 + pos]))
+	    result = -1;
+	  else
+	    result = 1;
+	}
+      p1[len1 + align1] = 0;
+      p2[len2 + align2] = 0;
+
+      FOR_EACH_IMPL (impl, 1)
+	{
+	  r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
+	  /* Test whether on 64-bit architectures where ABI requires
+	     callee to promote has the promotion been done.  */
+	  asm ("" : "=g" (r) : "0" (r));
+	  if ((r == 0 && result)
+	      || (r < 0 && result >= 0)
+	      || (r > 0 && result <= 0))
+	    {
+	      error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+		     n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
+	      ret = 1;
+	    }
+	}
+    }
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = 1; i < 16; ++i)
+    {
+      do_test (i, i, i, 127, 0);
+      do_test (i, i, i, 127, 1);
+      do_test (i, i, i, 127, -1);
+    }
+
+  for (i = 1; i < 10; ++i)
+    {
+      do_test (0, 0, 2 << i, 127, 0);
+      do_test (0, 0, 2 << i, 254, 0);
+      do_test (0, 0, 2 << i, 127, 1);
+      do_test (0, 0, 2 << i, 254, 1);
+      do_test (0, 0, 2 << i, 127, -1);
+      do_test (0, 0, 2 << i, 254, -1);
+    }
+
+  for (i = 1; i < 8; ++i)
+    {
+      do_test (i, 2 * i, 8 << i, 127, 0);
+      do_test (2 * i, i, 8 << i, 254, 0);
+      do_test (i, 2 * i, 8 << i, 127, 1);
+      do_test (2 * i, i, 8 << i, 254, 1);
+      do_test (i, 2 * i, 8 << i, 127, -1);
+      do_test (2 * i, i, 8 << i, 254, -1);
+    }
+
+  do_random_tests ();
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index e8d0285..f7eeb15 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -12,7 +12,8 @@ sysdep_routines += _mcount
 endif
 
 ifeq ($(subdir),string)
-sysdep_routines += cacheinfo
+sysdep_routines += cacheinfo strcasecmp_l-nonascii
+gen-as-const-headers += locale-defines.sym
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/x86_64/locale-defines.sym b/sysdeps/x86_64/locale-defines.sym
new file mode 100644
index 0000000..aebff9a
--- /dev/null
+++ b/sysdeps/x86_64/locale-defines.sym
@@ -0,0 +1,11 @@
+#include <locale/localeinfo.h>
+#include <langinfo.h>
+#include <stddef.h>
+
+--
+
+LOCALE_T___LOCALES		offsetof (struct __locale_struct, __locales)
+LC_CTYPE
+_NL_CTYPE_NONASCII_CASE
+LOCALE_DATA_VALUES		offsetof (struct __locale_data, values)
+SIZEOF_VALUES			sizeof (((struct __locale_data *) 0)->values[0])
diff --git a/sysdeps/x86_64/strcasecmp.S b/sysdeps/x86_64/strcasecmp.S
new file mode 100644
index 0000000..fe49e82
--- /dev/null
+++ b/sysdeps/x86_64/strcasecmp.S
@@ -0,0 +1 @@
+/* In strcasecmp_l.S.  */
diff --git a/sysdeps/x86_64/strcasecmp_l-nonascii.c b/sysdeps/x86_64/strcasecmp_l-nonascii.c
new file mode 100644
index 0000000..7a0a04f
--- /dev/null
+++ b/sysdeps/x86_64/strcasecmp_l-nonascii.c
@@ -0,0 +1,5 @@
+#include <string.h>
+
+#define __strcasecmp_l __strcasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL    1
+#include <string/strcasecmp.c>
diff --git a/sysdeps/x86_64/strcasecmp_l.S b/sysdeps/x86_64/strcasecmp_l.S
new file mode 100644
index 0000000..5456b3a
--- /dev/null
+++ b/sysdeps/x86_64/strcasecmp_l.S
@@ -0,0 +1,6 @@
+#define STRCMP __strcasecmp_l
+#define USE_AS_STRCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
+libc_hidden_def (strcasecmp_l)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index ac3fe14..7b2b246 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -51,6 +51,15 @@
 	je	LABEL(strcmp_exitz);			\
 	mov	%r9, %r11
 
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+
+/* No support for strcasecmp outside libc so far since it is not needed.  */
+# ifdef NOT_IN_lib
+#  error "strcasecmp_l not implemented so far"
+# endif
+
+# define UPDATE_STRNCMP_COUNTER
 #else
 # define UPDATE_STRNCMP_COUNTER
 # ifndef STRCMP
@@ -64,6 +73,19 @@
 	.section .text.ssse3,"ax",@progbits
 #endif
 
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp)
+	movq	__libc_tsd_LOCALE@gottpoff(%rip),%rax
+	movq	%fs:(%rax),%rdx
+
+	/* 5-byte NOP.  */
+	.byte	0x0f,0x1f,0x44,0x00,0x00
+END (__strcasecmp)
+weak_alias (__strcasecmp, strcasecmp)
+libc_hidden_def (__strcasecmp)
+	/* FALLTHROUGH to strcasecmp_l.  */
+#endif
+
 ENTRY (BP_SYM (STRCMP))
 #ifdef NOT_IN_libc
 /* Simple version since we can't use SSE registers in ld.so.  */
@@ -84,6 +106,18 @@ L(neq):	movl	$1, %eax
 	ret
 END (BP_SYM (STRCMP))
 #else	/* NOT_IN_libc */
+# ifdef USE_AS_STRCASECMP_L
+	/* We have to fall back on the C implementation for locales
+	   with encodings not matching ASCII for single bytes.  */
+#  if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+	movq	LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
+#  else
+	movq	(%rdx), %rax
+#  endif
+	testl	$0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+	jne	__strcasecmp_l_nonascii
+# endif
+
 /*
  * This implementation uses SSE to compare up to 16 bytes at a time.
  */
@@ -99,6 +133,26 @@ END (BP_SYM (STRCMP))
 /* Use 64bit AND here to avoid long NOP padding.  */
 	and	$0x3f, %rcx		/* rsi alignment in cache line */
 	and	$0x3f, %rax		/* rdi alignment in cache line */
+# ifdef USE_AS_STRCASECMP_L
+	.section .rodata.cst16,"aM",@progbits,16
+	.align 16
+.Lbelowupper:
+	.quad	0x4040404040404040
+	.quad	0x4040404040404040
+.Ltopupper:
+	.quad	0x5b5b5b5b5b5b5b5b
+	.quad	0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+	.quad	0x2020202020202020
+	.quad	0x2020202020202020
+	.previous
+	movdqa	.Lbelowupper(%rip), %xmm5
+# define UCLOW_reg %xmm5
+	movdqa	.Ltopupper(%rip), %xmm6
+# define UCHIGH_reg %xmm6
+	movdqa	.Ltouppermask(%rip), %xmm7
+# define LCQWORD_reg %xmm7
+# endif
 	cmp	$0x30, %ecx
 	ja	LABEL(crosscache)	/* rsi: 16-byte load will cross cache line */
 	cmp	$0x30, %eax
@@ -107,6 +161,26 @@ END (BP_SYM (STRCMP))
 	movlpd	(%rsi), %xmm2
 	movhpd	8(%rdi), %xmm1
 	movhpd	8(%rsi), %xmm2
+# ifdef USE_AS_STRCASECMP_L
+#  define TOLOWER(reg1, reg2) \
+	movdqa	reg1, %xmm8;					\
+	movdqa	UCHIGH_reg, %xmm9;				\
+	movdqa	reg2, %xmm10;					\
+	movdqa	UCHIGH_reg, %xmm11;				\
+	pcmpgtb	UCLOW_reg, %xmm8;				\
+	pcmpgtb	reg1, %xmm9;					\
+	pcmpgtb	UCLOW_reg, %xmm10;				\
+	pcmpgtb	reg2, %xmm11;					\
+	pand	%xmm9, %xmm8;					\
+	pand	%xmm11, %xmm10;					\
+	pand	LCQWORD_reg, %xmm8;				\
+	pand	LCQWORD_reg, %xmm10;				\
+	por	%xmm8, reg1;					\
+	por	%xmm10, reg2
+	TOLOWER (%xmm1, %xmm2)
+# else
+#  define TOLOWER(reg1, reg2)
+# endif
 	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
 	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
 	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
@@ -159,7 +233,13 @@ LABEL(ashr_0):
 	movdqa	(%rsi), %xmm1
 	pxor	%xmm0, %xmm0			/* clear %xmm0 for null char check */
 	pcmpeqb	%xmm1, %xmm0			/* Any null chars? */
+# ifndef USE_AS_STRCASECMP_L
 	pcmpeqb	(%rdi), %xmm1			/* compare 16 bytes for equality */
+# else
+	movdqa	(%rdi), %xmm2
+	TOLOWER (%xmm1, %xmm2)
+	pcmpeqb	%xmm2, %xmm1			/* compare 16 bytes for equality */
+# endif
 	psubb	%xmm0, %xmm1			/* packed sub of comparison results*/
 	pmovmskb %xmm1, %r9d
 	shr	%cl, %edx			/* adjust 0xffff for offset */
@@ -183,6 +263,7 @@ LABEL(ashr_0):
 LABEL(loop_ashr_0):
 	movdqa	(%rsi, %rcx), %xmm1
 	movdqa	(%rdi, %rcx), %xmm2
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -198,6 +279,7 @@ LABEL(loop_ashr_0):
 	add	$16, %rcx
 	movdqa	(%rsi, %rcx), %xmm1
 	movdqa	(%rdi, %rcx), %xmm2
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -214,7 +296,7 @@ LABEL(loop_ashr_0):
 
 /*
  * The following cases will be handled by ashr_1
- * rcx(offset of rsi)  rax(offset of rdi)   relative offset   	corresponding case
+ * rcx(offset of rsi)  rax(offset of rdi)   relative offset	corresponding case
  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
  */
 	.p2align 4
@@ -224,6 +306,7 @@ LABEL(ashr_1):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
 	pslldq	$15, %xmm2		/* shift first string to align with second */
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
 	pmovmskb %xmm2, %r9d
@@ -263,6 +346,7 @@ LABEL(gobble_ashr_1):
 # else
 	palignr	$1, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -292,6 +376,7 @@ LABEL(gobble_ashr_1):
 # else
 	palignr	$1, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -351,6 +436,7 @@ LABEL(ashr_2):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$14, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -390,6 +476,7 @@ LABEL(gobble_ashr_2):
 # else
 	palignr	$2, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -420,6 +507,7 @@ LABEL(gobble_ashr_2):
 # else
 	palignr	$2, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -472,6 +560,7 @@ LABEL(ashr_3):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$13, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -512,6 +601,7 @@ LABEL(gobble_ashr_3):
 # else
 	palignr	$3, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -542,6 +632,7 @@ LABEL(gobble_ashr_3):
 # else
 	palignr	$3, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -594,6 +685,7 @@ LABEL(ashr_4):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$12, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -634,6 +726,7 @@ LABEL(gobble_ashr_4):
 # else
 	palignr	$4, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -664,6 +757,7 @@ LABEL(gobble_ashr_4):
 # else
 	palignr	$4, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -716,6 +810,7 @@ LABEL(ashr_5):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$11, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -756,6 +851,7 @@ LABEL(gobble_ashr_5):
 # else
 	palignr	$5, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -786,6 +882,7 @@ LABEL(gobble_ashr_5):
 # else
 	palignr	$5, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -838,6 +935,7 @@ LABEL(ashr_6):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$10, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -878,6 +976,7 @@ LABEL(gobble_ashr_6):
 # else
 	palignr	$6, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -908,6 +1007,7 @@ LABEL(gobble_ashr_6):
 # else
 	palignr	$6, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -960,6 +1060,7 @@ LABEL(ashr_7):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$9, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1000,6 +1101,7 @@ LABEL(gobble_ashr_7):
 # else
 	palignr	$7, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1030,6 +1132,7 @@ LABEL(gobble_ashr_7):
 # else
 	palignr	$7, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1082,6 +1185,7 @@ LABEL(ashr_8):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$8, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1122,6 +1226,7 @@ LABEL(gobble_ashr_8):
 # else
 	palignr	$8, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1152,6 +1257,7 @@ LABEL(gobble_ashr_8):
 # else
 	palignr	$8, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1204,6 +1310,7 @@ LABEL(ashr_9):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$7, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1244,6 +1351,7 @@ LABEL(gobble_ashr_9):
 # else
 	palignr	$9, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1274,6 +1382,7 @@ LABEL(gobble_ashr_9):
 # else
 	palignr	$9, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1326,6 +1435,7 @@ LABEL(ashr_10):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$6, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1366,6 +1476,7 @@ LABEL(gobble_ashr_10):
 # else
 	palignr	$10, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1396,6 +1507,7 @@ LABEL(gobble_ashr_10):
 # else
 	palignr	$10, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1448,6 +1560,7 @@ LABEL(ashr_11):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$5, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1488,6 +1601,7 @@ LABEL(gobble_ashr_11):
 # else
 	palignr	$11, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1518,6 +1632,7 @@ LABEL(gobble_ashr_11):
 # else
 	palignr	$11, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1570,6 +1685,7 @@ LABEL(ashr_12):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$4, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1610,6 +1726,7 @@ LABEL(gobble_ashr_12):
 # else
 	palignr	$12, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1640,6 +1757,7 @@ LABEL(gobble_ashr_12):
 # else
 	palignr	$12, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1692,6 +1810,7 @@ LABEL(ashr_13):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$3, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1732,6 +1851,7 @@ LABEL(gobble_ashr_13):
 # else
 	palignr	$13, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1762,6 +1882,7 @@ LABEL(gobble_ashr_13):
 # else
 	palignr	$13, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1814,6 +1935,7 @@ LABEL(ashr_14):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq  $2, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1854,6 +1976,7 @@ LABEL(gobble_ashr_14):
 # else
 	palignr	$14, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1884,6 +2007,7 @@ LABEL(gobble_ashr_14):
 # else
 	palignr	$14, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1936,6 +2060,7 @@ LABEL(ashr_15):
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$1, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1978,6 +2103,7 @@ LABEL(gobble_ashr_15):
 # else
 	palignr	$15, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -2008,6 +2134,7 @@ LABEL(gobble_ashr_15):
 # else
 	palignr	$15, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -2049,6 +2176,7 @@ LABEL(ashr_15_exittail):
 
 	.p2align 4
 LABEL(aftertail):
+	TOLOWER (%xmm1, %xmm3)
 	pcmpeqb	%xmm3, %xmm1
 	psubb	%xmm0, %xmm1
 	pmovmskb %xmm1, %edx
@@ -2076,6 +2204,12 @@ LABEL(less16bytes):
 	movzbl	(%rsi, %rdx), %ecx
 	movzbl	(%rdi, %rdx), %eax
 
+# ifdef USE_AS_STRCASECMP_L
+	leaq	_nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
+	movl	(%rdx,%rcx,4), %ecx
+	movl	(%rdx,%rax,4), %eax
+# endif
+
 	sub	%ecx, %eax
 	ret
 

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fe36dd025ea34c5c082b688592618ec72369b96b

commit fe36dd025ea34c5c082b688592618ec72369b96b
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Fri Jul 30 00:09:07 2010 -0700

    Fix tolower operation in strcasestr.

diff --git a/ChangeLog b/ChangeLog
index cc53cf0..f19b63b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2010-07-30  Ulrich Drepper  <drepper@redhat.com>
+
+	* string/test-strcasestr.c: Test both ends of the range of characters.
+	* sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
+
 2010-07-29  Roland McGrath  <roland@redhat.com>
 
 	[BZ #11856]
diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
index 547537c..edc41f3 100644
--- a/string/test-strcasestr.c
+++ b/string/test-strcasestr.c
@@ -97,7 +97,7 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
   char *s1 = (char *) (buf1 + align1);
   char *s2 = (char *) (buf2 + align2);
 
-  static const char d[] = "1234567890abcdef";
+  static const char d[] = "1234567890abcxyz";
 #define dl (sizeof (d) - 1)
   char *ss2 = s2;
   for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index e2b19a3..45d7a55 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -174,7 +174,7 @@ __m128i_strloadu_tolower (const unsigned char *p, __m128i rangeuc,
   __m128i frag = __m128i_strloadu (p);
 
 #define UCLOW 0x4040404040404040ULL
-#define UCHIGH 0x5a5a5a5a5a5a5a5aULL
+#define UCHIGH 0x5b5b5b5b5b5b5b5bULL
 #define LCQWORD 0x2020202020202020ULL
   /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'.  */
   __m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag);

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                              |   16 ++
 NEWS                                   |    2 +-
 string/Makefile                        |    2 +-
 string/test-strcasecmp.c               |  276 ++++++++++++++++++++++++++++++++
 string/test-strcasestr.c               |    2 +-
 sysdeps/x86_64/Makefile                |    3 +-
 sysdeps/x86_64/locale-defines.sym      |   11 ++
 sysdeps/x86_64/multiarch/strstr.c      |    2 +-
 sysdeps/x86_64/strcasecmp.S            |    1 +
 sysdeps/x86_64/strcasecmp_l-nonascii.c |    5 +
 sysdeps/x86_64/strcasecmp_l.S          |    6 +
 sysdeps/x86_64/strcmp.S                |  136 ++++++++++++++++-
 12 files changed, 456 insertions(+), 6 deletions(-)
 create mode 100644 string/test-strcasecmp.c
 create mode 100644 sysdeps/x86_64/locale-defines.sym
 create mode 100644 sysdeps/x86_64/strcasecmp.S
 create mode 100644 sysdeps/x86_64/strcasecmp_l-nonascii.c
 create mode 100644 sysdeps/x86_64/strcasecmp_l.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]