This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH RFC] Pell strcoll initial iteration to improve performance.


Hi OndÅej,

just run a "make benchmark" and see build/benchtest/bench-strcoll.out.

Best,
Leonhard

Am 29.06.2015 um 16:53 schrieb OndÅej BÃlka:
> Hi Leonhard,
> 
> As we try to reduce initialization overhead you should try to peel first
> loop iteration. Witch following patch to do it I could have around 10% 
> speedup. Where do you have script to create performance difference?
> 
> I needed to add initialization of seq1.save_idx and seq1.back_us as gcc 
> thought it could be used uninitialized.
> 
> Comments?
> 
> 	* string/strcoll_l.c (STRCOLL): Peel first loop iteration.
> 
> 
> diff --git a/string/strcoll_l.c b/string/strcoll_l.c
> index 8f1225f..91b36f2 100644
> --- a/string/strcoll_l.c
> +++ b/string/strcoll_l.c
> @@ -311,7 +311,7 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
>    assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
>    assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
>    assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
> -
> +  int pass = 0;
>    int result = 0, rule = 0;
>  
>    coll_seq seq1, seq2;
> @@ -321,7 +321,61 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
>    seq2.len = 0;
>    seq2.idxmax = 0;
>  
> -  for (int pass = 0; pass < nrules; ++pass)
> +  seq1.save_idx = 0;
> +  seq1.back_us = NULL;
> +
> +  seq1.idxcnt = 0;
> +  seq1.idx = 0;
> +  seq2.idx = 0;
> +  seq1.backw_stop = ~0ul;
> +  seq1.backw = ~0ul;
> +  seq2.idxcnt = 0;
> +  seq2.backw_stop = ~0ul;
> +  seq2.backw = ~0ul;
> +
> +  /* We need the elements of the strings as unsigned values since they
> +     are used as indices.  */
> +  seq1.us = (const USTRING_TYPE *) s1;
> +  seq2.us = (const USTRING_TYPE *) s2;
> +
> +  /* We assume that if a rule has defined `position' in one section
> +     this is true for all of them.  Please note that the localedef programs
> +     makes sure that `position' is not used at the first level.  */
> +
> +  int position = rulesets[rule * nrules + pass] & sort_position;
> +
> +  get_next_seq (&seq1, nrules, rulesets, weights, table,
> +      extra, indirect, pass);
> +  get_next_seq (&seq2, nrules, rulesets, weights, table,
> +      extra, indirect, pass);
> +  /* See whether any or both strings are empty.  */
> +  if (seq1.len == 0 || seq2.len == 0)
> +    {
> +      if (seq1.len == seq2.len)
> +        {
> +          /* Both strings ended and are equal at this level.  Do a
> +	      byte-level comparison to ensure that we don't waste time
> +	      going through multiple passes for totally equal strings
> +	      before proceeding to subsequent passes.  */
> +          if (pass == 0 && encoding == __cet_other &&
> +	       STRCMP (s1, s2) == 0)
> +	     return result;
> +          else
> +	     goto next;
> +        }
> +
> +        /* This means one string is shorter than the other.  Find out
> +           which one and return an appropriate value.  */
> +      return seq1.len == 0 ? -1 : 1;
> +    }
> +
> +  result = do_compare (&seq1, &seq2, position, weights);
> +  if (result != 0)
> +    return result;
> +  goto start_while;
> +
> +
> +  for (pass = 0; pass < nrules; ++pass)
>      {
>        seq1.idxcnt = 0;
>        seq1.idx = 0;
> @@ -341,10 +395,11 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
>  	 this is true for all of them.  Please note that the localedef programs
>  	 makes sure that `position' is not used at the first level.  */
>  
> -      int position = rulesets[rule * nrules + pass] & sort_position;
> +      position = rulesets[rule * nrules + pass] & sort_position;
>  
>        while (1)
>  	{
> +start_while:
>  	  get_next_seq (&seq1, nrules, rulesets, weights, table,
>  				    extra, indirect, pass);
>  	  get_next_seq (&seq2, nrules, rulesets, weights, table,
> @@ -374,7 +429,7 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
>  	  if (result != 0)
>  	    return result;
>  	}
> -
> +      next:;
>        rule = seq1.rule;
>      }
>  
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]