This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH RFC] Pell strcoll initial iteration to improve performance.


Hi Leonhard,

As we try to reduce initialization overhead you should try to peel first
loop iteration. Witch following patch to do it I could have around 10% 
speedup. Where do you have script to create performance difference?

I needed to add initialization of seq1.save_idx and seq1.back_us as gcc 
thought it could be used uninitialized.

Comments?

	* string/strcoll_l.c (STRCOLL): Peel first loop iteration.


diff --git a/string/strcoll_l.c b/string/strcoll_l.c
index 8f1225f..91b36f2 100644
--- a/string/strcoll_l.c
+++ b/string/strcoll_l.c
@@ -311,7 +311,7 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
   assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
   assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
   assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
-
+  int pass = 0;
   int result = 0, rule = 0;
 
   coll_seq seq1, seq2;
@@ -321,7 +321,61 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
   seq2.len = 0;
   seq2.idxmax = 0;
 
-  for (int pass = 0; pass < nrules; ++pass)
+  seq1.save_idx = 0;
+  seq1.back_us = NULL;
+
+  seq1.idxcnt = 0;
+  seq1.idx = 0;
+  seq2.idx = 0;
+  seq1.backw_stop = ~0ul;
+  seq1.backw = ~0ul;
+  seq2.idxcnt = 0;
+  seq2.backw_stop = ~0ul;
+  seq2.backw = ~0ul;
+
+  /* We need the elements of the strings as unsigned values since they
+     are used as indices.  */
+  seq1.us = (const USTRING_TYPE *) s1;
+  seq2.us = (const USTRING_TYPE *) s2;
+
+  /* We assume that if a rule has defined `position' in one section
+     this is true for all of them.  Please note that the localedef programs
+     makes sure that `position' is not used at the first level.  */
+
+  int position = rulesets[rule * nrules + pass] & sort_position;
+
+  get_next_seq (&seq1, nrules, rulesets, weights, table,
+      extra, indirect, pass);
+  get_next_seq (&seq2, nrules, rulesets, weights, table,
+      extra, indirect, pass);
+  /* See whether any or both strings are empty.  */
+  if (seq1.len == 0 || seq2.len == 0)
+    {
+      if (seq1.len == seq2.len)
+        {
+          /* Both strings ended and are equal at this level.  Do a
+	      byte-level comparison to ensure that we don't waste time
+	      going through multiple passes for totally equal strings
+	      before proceeding to subsequent passes.  */
+          if (pass == 0 && encoding == __cet_other &&
+	       STRCMP (s1, s2) == 0)
+	     return result;
+          else
+	     goto next;
+        }
+
+        /* This means one string is shorter than the other.  Find out
+           which one and return an appropriate value.  */
+      return seq1.len == 0 ? -1 : 1;
+    }
+
+  result = do_compare (&seq1, &seq2, position, weights);
+  if (result != 0)
+    return result;
+  goto start_while;
+
+
+  for (pass = 0; pass < nrules; ++pass)
     {
       seq1.idxcnt = 0;
       seq1.idx = 0;
@@ -341,10 +395,11 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
 	 this is true for all of them.  Please note that the localedef programs
 	 makes sure that `position' is not used at the first level.  */
 
-      int position = rulesets[rule * nrules + pass] & sort_position;
+      position = rulesets[rule * nrules + pass] & sort_position;
 
       while (1)
 	{
+start_while:
 	  get_next_seq (&seq1, nrules, rulesets, weights, table,
 				    extra, indirect, pass);
 	  get_next_seq (&seq2, nrules, rulesets, weights, table,
@@ -374,7 +429,7 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
 	  if (result != 0)
 	    return result;
 	}
-
+      next:;
       rule = seq1.rule;
     }
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]