This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[RFC PATCH 3/3] Exploit that strchr needle is mostly ascii
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Tue, 26 May 2015 20:10:35 +0200
- Subject: [RFC PATCH 3/3] Exploit that strchr needle is mostly ascii
- Authentication-results: sourceware.org; auth=none
- References: <20150526173150 dot GA26817 at domone>
I realized that strchrnul could be optimized more with exploiting
statistical properties of input. A idea is that needle is most time in
range 0-127
When we handle bytes 128-255 separately it allows considerable
simplification as we do remove false positive 128 only once instead of
twice as previously done.
Comments?
* string/strchrnul.c: Exploit that c is most times in ascii.
diff --git b/string/strchrnul.c a/string/strchrnul.c
index ea91195..5019773 100644
--- b/string/strchrnul.c
+++ a/string/strchrnul.c
@@ -47,12 +47,24 @@ contains_zero (unsigned long int s)
return ((s + add) ^ ~s) & high_bits;
}
+
+/* Here idea is still use the result of expression
+ contains_zero (*p) | contains_zero (*p ^ cmask)
+ but we can optimize it. By moving or to compute
+ ((s + add) | ((s ^ cmask) + add) a highest bit is set only for characters
+ 0, c, 128, c + 128
+ As we ensured that c has highest byte zero a ^ ~*p eliminates both 128 and
+ 128 + c instead doing xor twice. */
+
+
static always_inline
int
found_in_long_bytes(char *s, unsigned long int cmask, char **result)
{
const unsigned long int *lptr = (const unsigned long int *) s;
- unsigned long int mask = contains_zero (*lptr) | contains_zero (*lptr ^ cmask);
+ unsigned long int mask = (((*lptr + add) | ((*lptr ^ cmask) + add))
+ ^ ~*lptr) & high_bits;
+
if (mask)
{
*result = s + ffsl (mask) / 8 - 1;
@@ -76,9 +88,30 @@ STRCHRNUL (const char *s_in, int c_in)
const unsigned long int *lptr;
char *s = (char *) s_in;
unsigned char c = (unsigned char) c_in;
- char *r;
+ char *r, *s_aligned;
unsigned long int cmask = c * ones;
+ if (__libc_unlikely (c > 127))
+ {
+ s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
+ lptr = (const unsigned long int *) s_aligned;
+ mask = (contains_zero (*lptr)
+ | contains_zero (*lptr ^ cmask))
+ >> (8 * (s_aligned - s));
+
+ if (mask)
+ return s + ffsl (mask) / 8 - 1;
+ while (1)
+ {
+ s_aligned += LSIZE;
+ lptr = (const unsigned long int *) s_aligned;
+ mask = (contains_zero (*lptr)
+ | contains_zero (*lptr ^ cmask));
+ if (mask)
+ return s_aligned + ffsl (mask) / 8 - 1;
+ }
+ }
+
#if _STRING_ARCH_unaligned
/* We fetch 32 bytes while not crossing page boundary.
Most strings in practice are of that size and we avoid a loop.
@@ -115,7 +148,7 @@ STRCHRNUL (const char *s_in, int c_in)
/* We need use aligned loads. For first load we read some bytes before
start that we discard by shifting them down. */
- char *s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
+ s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
lptr = (const unsigned long int *) s_aligned;
mask = (contains_zero (*lptr)
| contains_zero (*lptr ^ cmask))