This is the mail archive of the libc-alpha@sourceware.cygnus.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

dcigettext bug fixes



Hi Ulrich,

In the current CVS glibc, dcigettext has a few bugs. I stumbled across them
while testing the automatic charset conversion feature with an UTF-8 message
catalog in GNU hello.

- The plural feature (skipping 'index' strings using repeated
     result += strlen (result) + 1;
  ) works only for hash entries which come first in a hash collision list.
  It does not work for the remaining hash entries which happen to have
  the same primary hash code as another entry. It also doesn't work if the
  message catalog was created without a hash table.

- Similarly, the automatic charset conversion feature via gconv doesn't
  work for the remaining hash entries in collision lists, or when the
  message catalog doesn't have a hash table.

- The plural feature and the charset conversion feature don't work together:
  A single table slot is used for the result, independently of 'index'.
  Therefore after a call to
     _nl_find_msg (domain, msgid, 1)
  a subsequent call to
     _nl_find_msg (domain, msgid, 0)
  will return the cached result of the previous call with a different index,
  which is wrong.

The patch below fixes all this, plus tweaks the following:

- In case of out of memory during conversion, return the unconverted
  translated string, not NULL.

- If !_LIBC and HAVE_ICONV, use iconv instead of __gconv.

It structures the _nl_find_msg function into three steps:

1. Fetch the translation from the catalog, using either the primary hash
   table entry, or the hash collision detection, or binary search.
2. Convert using gconv/iconv.
3. Skip 'index' strings.

I've tested this with GNU hello.

            Bruno


2000-04-23  Bruno Haible  <clisp.cons.org>

	* intl/dcigettext.c (_nl_find_msg): Do the plural treatment and
	character set conversion also in the case the translation was found
	after a hash collision or through binary search. Store the maximum
	index together with the converted message. Use iconv() when available.
	In case of out of memory during conversion, return the unconverted
	translated string, not NULL.

*** intl/dcigettext.c.bak	Mon Feb 28 11:34:03 2000
--- intl/dcigettext.c	Mon Apr 24 10:32:58 2000
***************
*** 626,634 ****
       const char *msgid;
       unsigned long int index;
  {
-   size_t act = 0;
-   size_t top, bottom;
    struct loaded_domain *domain;
  
    if (domain_file->decided == 0)
      _nl_load_domain (domain_file);
--- 633,641 ----
       const char *msgid;
       unsigned long int index;
  {
    struct loaded_domain *domain;
+   size_t act;
+   char *result;
  
    if (domain_file->decided == 0)
      _nl_load_domain (domain_file);
***************
*** 657,759 ****
  		     domain->data + W (domain->must_swap,
  				       domain->orig_tab[nstr - 1].offset)) == 0)
  	{
! 	  /* We found an entry.  If we have to convert the string to use
! 	     a different character set this is the time.  */
! 	  char *result =
! 	    (char *) domain->data + W (domain->must_swap,
! 				       domain->trans_tab[nstr - 1].offset);
! 
! 	  /* Now skip some strings.  How much depends on the index passed
! 	     in.  */
! 	  while (index-- > 0)
! 	    {
! #ifdef _LIBC
! 	      result = __rawmemchr (result, '\0');
! #else
! 	      result = strchr (result, '\0');
! #endif
! 	      /* And skip over the NUL byte.  */
! 	      ++result;
! 	    }
! 
! 	  if (
! #ifdef _LIBC
! 	      domain->conv != (__gconv_t) -1
! #else
! # if HAVE_ICONV
! 	      domain->conv != (iconv_t) -1
! # endif
! #endif
! 	      )
! 	    {
! 	      /* We are supposed to do a conversion.  First allocate an
! 		 appropriate table with the same structure as the hash
! 		 table in the file where we can put the pointers to the
! 		 converted strings in.  */
! 	      if (domain->conv_tab == NULL
! 		  && ((domain->conv_tab = (char **) calloc (domain->hash_size,
! 							    sizeof (char *)))
! 		      == NULL))
! 		/* Mark that we didn't succeed allocating a table.  */
! 		domain->conv_tab = (char **) -1;
! 
! 	      if (domain->conv_tab == (char **) -1)
! 		/* Nothing we can do, no more memory.  */
! 		return NULL;
! 
! 	      if (domain->conv_tab[idx] == NULL)
! 		{
! 		  /* We haven't used this string so far, so it is not
! 		     translated yet.  Do this now.  */
! #ifdef _LIBC
! 		  /* For glibc we use a bit more efficient memory handling.
! 		     We allocate always larger blocks which get used over
! 		     time.  This is faster than many small allocations.   */
! 		  __libc_lock_define_initialized (static, lock)
! 		  static unsigned char *freemem;
! 		  static size_t freemem_size;
! 		  /* Note that we include the NUL byte.  */
! 		  size_t resultlen = strlen (result) + 1;
! 		  const unsigned char *inbuf = result;
! 		  unsigned char *outbuf = freemem;
! 		  size_t written;
! 		  int res;
! 
! 		  __libc_lock_lock (lock);
! 
! 		  while ((res = __gconv (domain->conv,
! 					 &inbuf, inbuf + resultlen,
! 					 &outbuf, outbuf + freemem_size,
! 					 &written)) == __GCONV_OK)
! 		    {
! 		      if (res != __GCONV_FULL_OUTPUT)
! 			goto out;
! 
! 		      /* We must resize the buffer.  */
! 		      freemem_size = MAX (2 * freemem_size, 4064);
! 		      freemem = (char *) malloc (freemem_size);
! 		      if (freemem == NULL)
! 			goto out;
! 
! 		      inbuf = result;
! 		      outbuf = freemem;
! 		    }
! 
! 		  /* We have now in our buffer a converted string.  Put this
! 		     in the hash table  */
! 		  domain->conv_tab[idx] = freemem;
! 		  freemem_size -= outbuf - freemem;
! 		  freemem = outbuf;
! 
! 		out:
! 		  __libc_lock_unlock (lock);
! #endif
! 		}
! 
! 	      result = domain->conv_tab[idx];
! 	    }
! 
! 	  return result;
  	}
  
        while (1)
--- 664,671 ----
  		     domain->data + W (domain->must_swap,
  				       domain->orig_tab[nstr - 1].offset)) == 0)
  	{
! 	  act = nstr - 1;
! 	  goto found;
  	}
  
        while (1)
***************
*** 773,809 ****
  			  domain->data + W (domain->must_swap,
  					    domain->orig_tab[nstr - 1].offset))
  		  == 0))
! 	    return ((char *) domain->data
! 		    + W (domain->must_swap,
! 			 domain->trans_tab[nstr - 1].offset));
  	}
        /* NOTREACHED */
      }
  
!   /* Now we try the default method:  binary search in the sorted
!      array of messages.  */
!   bottom = 0;
!   top = domain->nstrings;
!   while (bottom < top)
      {
!       int cmp_val;
  
!       act = (bottom + top) / 2;
!       cmp_val = strcmp (msgid, (domain->data
! 				+ W (domain->must_swap,
! 				     domain->orig_tab[act].offset)));
!       if (cmp_val < 0)
! 	top = act;
!       else if (cmp_val > 0)
! 	bottom = act + 1;
!       else
! 	break;
      }
  
!   /* If an translation is found return this.  */
!   return bottom >= top ? NULL : ((char *) domain->data
! 				 + W (domain->must_swap,
! 				      domain->trans_tab[act].offset));
  }
  
  
--- 685,883 ----
  			  domain->data + W (domain->must_swap,
  					    domain->orig_tab[nstr - 1].offset))
  		  == 0))
! 	    {
! 	      act = nstr - 1;
! 	      goto found;
! 	    }
  	}
        /* NOTREACHED */
      }
+   else
+     {
+       /* Try the default method:  binary search in the sorted array of
+ 	 messages.  */
+       size_t top, bottom;
+ 
+       bottom = 0;
+       top = domain->nstrings;
+       while (bottom < top)
+ 	{
+ 	  int cmp_val;
+ 
+ 	  act = (bottom + top) / 2;
+ 	  cmp_val = strcmp (msgid, (domain->data
+ 				    + W (domain->must_swap,
+ 					 domain->orig_tab[act].offset)));
+ 	  if (cmp_val < 0)
+ 	    top = act;
+ 	  else if (cmp_val > 0)
+ 	    bottom = act + 1;
+ 	  else
+ 	    goto found;
+ 	}
+       /* No translation was found.  */
+       return NULL;
+     }
  
!  found:
!   /* The translation was found at index ACT.  If we have to convert the
!      string to use a different character set, this is the time.  */
!   result = (char *) domain->data
! 	   + W (domain->must_swap, domain->trans_tab[act].offset);
! 
! #if defined _LIBC || HAVE_ICONV
!   if (
! # ifdef _LIBC
!       domain->conv != (__gconv_t) -1
! # else
! #  if HAVE_ICONV
!       domain->conv != (iconv_t) -1
! #  endif
! # endif
!       )
      {
!       /* We are supposed to do a conversion.  First allocate an
! 	 appropriate table with the same structure as the table
! 	 of translations in the file, where we can put the pointers
! 	 to the converted strings in.
! 	 The is a slight complication with the INDEX: We don't know
! 	 a priori which entries are plural entries. Therefore at any
! 	 moment we can only translate the variants 0 .. INDEX.  */
! 
!       if (domain->conv_tab == NULL
! 	  && ((domain->conv_tab = (char **) calloc (domain->nstrings,
! 						    sizeof (char *)))
! 	      == NULL))
! 	/* Mark that we didn't succeed allocating a table.  */
! 	domain->conv_tab = (char **) -1;
! 
!       if (domain->conv_tab == (char **) -1)
! 	/* Nothing we can do, no more memory.  */
! 	goto converted;
! 
!       if (domain->conv_tab[act] == NULL
! 	  || *(nls_uint32 *) domain->conv_tab[act] < index)
! 	{
! 	  /* We haven't used this string so far, so it is not
! 	     translated yet.  Do this now.  */
! 	  /* We use a bit more efficient memory handling.
! 	     We allocate always larger blocks which get used over
! 	     time.  This is faster than many small allocations.   */
! 	  __libc_lock_define_initialized (static, lock)
! 	  static unsigned char *freemem;
! 	  static size_t freemem_size;
! 
! 	  size_t resultlen;
! 	  const unsigned char *inbuf;
! 	  unsigned char *outbuf;
! 
! 	  /* Note that we translate (index + 1) consecutive strings at
! 	     once, including the final NUL byte.  */
! 	  {
! 	    unsigned long int i = index;
! 	    char *p = result;
! 	    do
! 	      p += strlen (p) + 1;
! 	    while (i-- > 0);
! 	    resultlen = p - result;
! 	  }
  
! 	  inbuf = result;
! 	  outbuf = freemem + 4;
! 
! 	  __libc_lock_lock (lock);
! 
! # ifdef _LIBC
! 	  {
! 	    size_t written;
! 	    int res;
! 
! 	    while ((res = __gconv (domain->conv,
! 				   &inbuf, inbuf + resultlen,
! 				   &outbuf, outbuf + freemem_size,
! 				   &written)) == __GCONV_OK)
! 	      {
! 		if (res != __GCONV_FULL_OUTPUT)
! 		  goto out;
! 
! 		/* We must resize the buffer.  */
! 		freemem_size = MAX (2 * freemem_size, 4064);
! 		freemem = (char *) malloc (freemem_size);
! 		if (freemem == NULL)
! 		  goto out;
! 
! 		inbuf = result;
! 		outbuf = freemem + 4;
! 	      }
! 	  }
! # else
! #  if HAVE_ICONV
! 	  for (;;)
! 	    {
! 	      const char *inptr = (const char *) inbuf;
! 	      size_t inleft = resultlen;
! 	      char *outptr = (char *) outbuf;
! 	      size_t outleft = freemem_size;
! 
! 	      if (iconv (domain->conv, &inptr, &inleft, &outptr, &outleft)
! 		  != (size_t)(-1))
! 		{
! 		  outbuf = (unsigned char *) outptr;
! 		  break;
! 		}
! 	      if (errno != E2BIG)
! 		goto out;
! 
! 	      /* We must resize the buffer.  */
! 	      freemem_size = 2 * freemem_size;
! 	      if (freemem_size < 4064)
! 		freemem_size = 4064;
! 	      freemem = (char *) malloc (freemem_size);
! 	      if (freemem == NULL)
! 		goto out;
! 
! 	      outbuf = freemem + 4;
! 	    }
! #  endif
! # endif
! 
! 	  /* We have now in our buffer a converted string.  Put this
! 	     into the table of conversions.  */
! 	  *(nls_uint32 *) freemem = index;
! 	  domain->conv_tab[act] = freemem;
! 	  /* Shrink freemem, but keep it aligned.  */
! 	  freemem_size -= outbuf - freemem;
! 	  freemem = outbuf;
! 	  freemem += freemem_size & 3;
! 	  freemem_size = freemem_size & ~3;
! 
! 	out:
! 	  __libc_lock_unlock (lock);
! 	}
! 
!       /* Now domain->conv_tab[act] contains the translation of at least
! 	 the variants 0 .. INDEX.  */
!       result = domain->conv_tab[act] + 4;
!     }
! 
!  converted:
!   /* The result string is converted.  */
! 
! #endif /* _LIBC || HAVE_ICONV */
! 
!   /* Now skip some strings.  How much depends on the index passed in.  */
!   while (index-- > 0)
!     {
! #ifdef _LIBC
!       result = __rawmemchr (result, '\0');
! #else
!       result = strchr (result, '\0');
! #endif
!       /* And skip over the NUL byte.  */
!       ++result;
      }
  
!   return result;
  }
  
  

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]