This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[ping][PATCH 1/2 v1.1][BZ #14547] Fix CVE-2012-4412


Ping!

On Wed, Aug 21, 2013 at 08:39:12PM +0530, Siddhesh Poyarekar wrote:
> Hi,
> 
> Here's an updated patch with some small changes based on some bugs I
> found in my patch.  Patch 2/2 will have the test case to verify that
> this does not regress.  It runs for 5 minutes and causes a lot of pain
> to the build system, so I've kept it as an xtests like Andreas Schwab
> suggested.
> 
> No regressions in the testsuite (with check and xcheck) as tested on
> x86_64 and x86.
> 
> Siddhesh
> 
> 	[BZ #14547]
> 	* string/strcoll_l.c (coll_seq): New members rule, idx,
> 	save_idx and back_us.
> 	(get_next_seq_nocache): New function.
> 	(do_compare_nocache): New function.
> 	(STRCOLL): Use get_next_seq_nocache and do_compare_nocache
> 	when malloc fails.
> 
> diff --git a/string/strcoll_l.c b/string/strcoll_l.c
> index 50ed84d..eb042ff 100644
> --- a/string/strcoll_l.c
> +++ b/string/strcoll_l.c
> @@ -45,7 +45,7 @@
>  typedef struct
>  {
>    int len;			/* Length of the current sequence.  */
> -  int val;			/* Position of the sequence relative to the
> +  size_t val;			/* Position of the sequence relative to the
>  				   previous non-ignored sequence.  */
>    size_t idxnow;		/* Current index in sequences.  */
>    size_t idxmax;		/* Maximum index in sequences.  */
> @@ -55,6 +55,12 @@ typedef struct
>    const USTRING_TYPE *us;	/* The string.  */
>    int32_t *idxarr;		/* Array to cache weight indices.  */
>    unsigned char *rulearr;	/* Array to cache rules.  */
> +  unsigned char rule;		/* Saved rule for the first sequence.  */
> +  int32_t idx;			/* Index to weight of the current sequence.  */
> +  int32_t save_idx;		/* Save looked up index of a forward
> +				   sequence after the last backward
> +				   sequence.  */
> +  const USTRING_TYPE *back_us;	/* Beginning of the backward sequence.  */
>  } coll_seq;
>  
>  /* Get next sequence.  The weight indices are cached, so we don't need to
> @@ -64,7 +70,7 @@ get_next_seq_cached (coll_seq *seq, int nrules, int pass,
>  		     const unsigned char *rulesets,
>  		     const USTRING_TYPE *weights)
>  {
> -  int val = seq->val = 0;
> +  size_t val = seq->val = 0;
>    int len = seq->len;
>    size_t backw_stop = seq->backw_stop;
>    size_t backw = seq->backw;
> @@ -146,7 +152,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
>  	      const USTRING_TYPE *extra, const int32_t *indirect)
>  {
>  #include WEIGHT_H
> -  int val = seq->val = 0;
> +  size_t val = seq->val = 0;
>    int len = seq->len;
>    size_t backw_stop = seq->backw_stop;
>    size_t backw = seq->backw;
> @@ -162,7 +168,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
>        ++val;
>        if (backw_stop != ~0ul)
>  	{
> -	  /* The is something pushed.  */
> +	  /* There is something pushed.  */
>  	  if (backw == backw_stop)
>  	    {
>  	      /* The last pushed character was handled.  Continue
> @@ -227,15 +233,199 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
>    seq->us = us;
>  }
>  
> -/* Compare two sequences.  */
> +/* Get next sequence.  Traverse the string as required.  This function does not
> +   set or use any index or rule cache.  */
> +static void
> +get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
> +		      const USTRING_TYPE *weights, const int32_t *table,
> +		      const USTRING_TYPE *extra, const int32_t *indirect,
> +		      int pass)
> +{
> +#include WEIGHT_H
> +  size_t val = seq->val = 0;
> +  int len = seq->len;
> +  size_t backw_stop = seq->backw_stop;
> +  size_t backw = seq->backw;
> +  size_t idxcnt = seq->idxcnt;
> +  size_t idxmax = seq->idxmax;
> +  int32_t idx = seq->idx;
> +  const USTRING_TYPE *us = seq->us;
> +
> +  while (len == 0)
> +    {
> +      ++val;
> +      if (backw_stop != ~0ul)
> +	{
> +	  /* There is something pushed.  */
> +	  if (backw == backw_stop)
> +	    {
> +	      /* The last pushed character was handled.  Continue
> +		 with forward characters.  */
> +	      if (idxcnt < idxmax)
> +		{
> +		  idx = seq->save_idx;
> +		  backw_stop = ~0ul;
> +		}
> +	      else
> +		{
> +		  /* Nothing anymore.  The backward sequence ended with
> +		     the last sequence in the string.  Note that len is
> +		     still zero.  */
> +		  idx = 0;
> +		  break;
> +	        }
> +	    }
> +	  else
> +	    {
> +	      /* XXX Traverse BACKW sequences from the beginning of
> +		 BACKW_STOP to get the next sequence.  Is ther a quicker way
> +	         to do this?  */
> +	      size_t i = backw_stop;
> +	      us = seq->back_us;
> +	      while (i < backw)
> +		{
> +		  int32_t tmp = findidx (&us, -1);
> +		  idx = tmp & 0xffffff;
> +		  i++;
> +		}
> +	      --backw;
> +	      us = seq->us;
> +	    }
> +	}
> +      else
> +	{
> +	  backw_stop = idxmax;
> +	  int32_t prev_idx = idx;
> +
> +	  while (*us != L('\0'))
> +	    {
> +	      int32_t tmp = findidx (&us, -1);
> +	      unsigned char rule = tmp >> 24;
> +	      prev_idx = idx;
> +	      idx = tmp & 0xffffff;
> +	      idxcnt = idxmax++;
> +
> +	      /* Save the rule for the first sequence.  */
> +	      if (__glibc_unlikely (idxcnt == 0))
> +	        seq->rule = rule;
> +
> +	      if ((rulesets[rule * nrules + pass]
> +		   & sort_backward) == 0)
> +		/* No more backward characters to push.  */
> +		break;
> +	      ++idxcnt;
> +	    }
> +
> +	  if (backw_stop >= idxcnt)
> +	    {
> +	      /* No sequence at all or just one.  */
> +	      if (idxcnt == idxmax || backw_stop > idxcnt)
> +		/* Note that len is still zero.  */
> +		break;
> +
> +	      backw_stop = ~0ul;
> +	    }
> +	  else
> +	    {
> +	      /* We pushed backward sequences.  If the stream ended with the
> +		 backward sequence, then we process the last sequence we
> +		 found.  Otherwise we process the sequence before the last
> +		 one since the last one was a forward sequence.  */
> +	      seq->back_us = seq->us;
> +	      seq->us = us;
> +	      backw = idxcnt;
> +	      if (idxmax > idxcnt)
> +		{
> +		  backw--;
> +		  seq->save_idx = idx;
> +		  idx = prev_idx;
> +		}
> +	      if (backw > backw_stop)
> +		backw--;
> +	    }
> +	}
> +
> +      len = weights[idx++];
> +      /* Skip over indices of previous levels.  */
> +      for (int i = 0; i < pass; i++)
> +	{
> +	  idx += len;
> +	  len = weights[idx];
> +	  idx++;
> +	}
> +    }
> +
> +  /* Update the structure.  */
> +  seq->val = val;
> +  seq->len = len;
> +  seq->backw_stop = backw_stop;
> +  seq->backw = backw;
> +  seq->idxcnt = idxcnt;
> +  seq->idxmax = idxmax;
> +  seq->us = us;
> +  seq->idx = idx;
> +}
> +
> +/* Compare two sequences.  This version does not use the index and rules
> +   cache.  */
> +static int
> +do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position,
> +		    const USTRING_TYPE *weights)
> +{
> +  int seq1len = seq1->len;
> +  int seq2len = seq2->len;
> +  size_t val1 = seq1->val;
> +  size_t val2 = seq2->val;
> +  int idx1 = seq1->idx;
> +  int idx2 = seq2->idx;
> +  int result = 0;
> +
> +  /* Test for position if necessary.  */
> +  if (position && val1 != val2)
> +    {
> +      result = val1 > val2 ? 1 : -1;
> +      goto out;
> +    }
> +
> +  /* Compare the two sequences.  */
> +  do
> +    {
> +      if (weights[idx1] != weights[idx2])
> +	{
> +	  /* The sequences differ.  */
> +	  result = weights[idx1] - weights[idx2];
> +	  goto out;
> +	}
> +
> +      /* Increment the offsets.  */
> +      ++idx1;
> +      ++idx2;
> +
> +      --seq1len;
> +      --seq2len;
> +    }
> +  while (seq1len > 0 && seq2len > 0);
> +
> +  if (position && seq1len != seq2len)
> +    result = seq1len - seq2len;
> +
> +out:
> +  seq1->len = seq1len;
> +  seq2->len = seq2len;
> +  seq1->idx = idx1;
> +  seq2->idx = idx2;
> +  return result;
> +}
> +
> +/* Compare two sequences using the index cache.  */
>  static int
>  do_compare (coll_seq *seq1, coll_seq *seq2, int position,
>  	    const USTRING_TYPE *weights)
>  {
>    int seq1len = seq1->len;
>    int seq2len = seq2->len;
> -  int val1 = seq1->val;
> -  int val2 = seq2->val;
> +  size_t val1 = seq1->val;
> +  size_t val2 = seq2->val;
>    int32_t *idx1arr = seq1->idxarr;
>    int32_t *idx2arr = seq2->idxarr;
>    int idx1now = seq1->idxnow;
> @@ -245,7 +435,7 @@ do_compare (coll_seq *seq1, coll_seq *seq2, int position,
>    /* Test for position if necessary.  */
>    if (position && val1 != val2)
>      {
> -      result = val1 - val2;
> +      result = val1 > val2 ? 1 : -1;
>        goto out;
>      }
>  
> @@ -334,57 +524,62 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
>    memset (&seq1, 0, sizeof (seq1));
>    seq2 = seq1;
>  
> -  /* We need the elements of the strings as unsigned values since they
> -     are used as indices.  */
> -  seq1.us = (const USTRING_TYPE *) s1;
> -  seq2.us = (const USTRING_TYPE *) s2;
> -
>    if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
>      {
>        seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
> -      seq2.idxarr = &seq1.idxarr[s1len];
> -      seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
> -      seq2.rulearr = &seq1.rulearr[s1len];
> -
> -      if (seq1.idxarr == NULL)
> -	/* No memory.  Well, go with the stack then.
> -
> -	   XXX Once this implementation is stable we will handle this
> -	   differently.  Instead of precomputing the indices we will
> -	   do this in time.  This means, though, that this happens for
> -	   every pass again.  */
> -	goto try_stack;
> -      use_malloc = true;
> +
> +      /* If we failed to allocate memory, we leave everything as NULL so that
> +	 we use the nocache version of traversal and comparison functions.  */
> +      if (seq1.idxarr != NULL)
> +	{
> +	  seq2.idxarr = &seq1.idxarr[s1len];
> +	  seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
> +	  seq2.rulearr = &seq1.rulearr[s1len];
> +	  use_malloc = true;
> +	}
>      }
>    else
>      {
> -    try_stack:
>        seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t));
>        seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t));
>        seq1.rulearr = (unsigned char *) alloca (s1len);
>        seq2.rulearr = (unsigned char *) alloca (s2len);
>      }
>  
> -  seq1.rulearr[0] = 0;
> +  int rule = 0;
>  
>    /* Cache values in the first pass and if needed, use them in subsequent
>       passes.  */
>    for (int pass = 0; pass < nrules; ++pass)
>      {
>        seq1.idxcnt = 0;
> +      seq1.idx = 0;
> +      seq2.idx = 0;
>        seq1.backw_stop = ~0ul;
>        seq1.backw = ~0ul;
>        seq2.idxcnt = 0;
>        seq2.backw_stop = ~0ul;
>        seq2.backw = ~0ul;
>  
> +      /* We need the elements of the strings as unsigned values since they
> +	 are used as indices.  */
> +      seq1.us = (const USTRING_TYPE *) s1;
> +      seq2.us = (const USTRING_TYPE *) s2;
> +
>        /* We assume that if a rule has defined `position' in one section
>  	 this is true for all of them.  */
> -      int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position;
> +      int position = rulesets[rule * nrules + pass] & sort_position;
>  
>        while (1)
>  	{
> -	  if (pass == 0)
> +	  if (__glibc_unlikely (seq1.idxarr == NULL))
> +	    {
> +	      get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,
> +				    extra, indirect, pass);
> +	      get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,
> +				    extra, indirect, pass);
> +	    }
> +	  else if (pass == 0)
>  	    {
>  	      get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
>  			    indirect);
> @@ -411,10 +606,18 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
>  	      goto free_and_return;
>  	    }
>  
> -	  result = do_compare (&seq1, &seq2, position, weights);
> +	  if (__glibc_unlikely (seq1.idxarr == NULL))
> +	    result = do_compare_nocache (&seq1, &seq2, position, weights);
> +	  else
> +	    result = do_compare (&seq1, &seq2, position, weights);
>  	  if (result != 0)
>  	    goto free_and_return;
>  	}
> +
> +      if (__glibc_likely (seq1.rulearr != NULL))
> +	rule = seq1.rulearr[0];
> +      else
> +	rule = seq1.rule;
>      }
>  
>    /* Free the memory if needed.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]