This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: RFC: IDN support in getaddrinfo().

From: Simon Josefsson <jas at extundo dot com>
To: libc-alpha at sources dot redhat dot com
Date: Mon, 08 Mar 2004 00:39:57 +0100
Subject: Re: RFC: IDN support in getaddrinfo().
References: <ilu65ivph18.fsf@extundo.com> <3FC41F79.3070104@redhat.com><ilu1xrvv65j.fsf@latte.josefsson.org> <3FC47A3B.3060602@redhat.com><ilu8ym3tnyo.fsf@latte.josefsson.org> <3FC4E691.3080605@redhat.com><iluekvvq97l.fsf@latte.josefsson.org> <3FC4F244.5020007@redhat.com><iluwu9notm5.fsf@latte.josefsson.org><ilu3ccaq50i.fsf@latte.josefsson.org> <3FC50672.6080002@redhat.com><iluk75molsr.fsf@latte.josefsson.org>
Here's a patch that implement the dlopen approach for loading libidn.
Not exactly the way I originally intended, but it seem rather simple.

What do you think?

Note that someone familiar with how iconv should be called from within
libc will have to fix toutf8.c.  IIRC, Ulrich said that he could look
into that.  (Simply calling iconv directly crashes, I believe.)
Please keep in mind that I'd like to merge the changes back into
libidn, which should work standalone from libc, so keep the changes
minimal and within #ifdef _LIBC.

The patch do not implement my AI_CANONIDN idea, but I don't think it
is critical in the first version.  See
http://savannah.gnu.org/cgi-bin/viewcvs/libidn/libidn/libc/getaddrinfo-idn.txt?rev=1.7&content-type=text/vnd.viewcvs-markup
for more information on my entire approach.

Thanks,
Simon

2004-03-07  Simon Josefsson  <jas@extundo.com>

	* libidn/Banner: New.
	* libidn/Makefile: Likewise.
	* libidn/Version: Likewise.
	* libidn/configure: Likewise.
	* libidn/idn-stub.c: Likewise.
	* libidn/gunibreak.h: Copied from Libidn.
	* libidn/gunicomp.h: Likewise.
	* libidn/gunidecomp.h: Likewise.
	* libidn/idna.h: Likewise.
	* libidn/idna.c: Likewise.
	* libidn/nfkc.c: Likewise.
	* libidn/profiles.c: Likewise.
	* libidn/punycode.c: Likewise.
	* libidn/punycode.h: Likewise.
	* libidn/rfc3454.c: Likewise.
	* libidn/stringprep.c: Likewise.
	* libidn/stringprep.h: Likewise.
	* libidn/toutf8.c: Copied from Libidn and modified to disable
	iconv code, pending glibc specific rewrite by GLIBC team.
	* resolv/netdb.h [__USE_GNU]: Add new AI_IDN ai_flags for addrinfo.
	[__USE_GNU]: Add new error code EAI_IDN_ENCODE for getaddrinfo.
	* sysdeps/posix/getaddrinfo.c: Add prototype for idna_to_ascii_lz
	and define IDNA_SUCCESS.
	(gaih_inet): If ai_flags have AI_IDN, invoke idna_to_ascii_lz on
	name.
	(getaddrinfo): Fix EAI_BADFLAGS test to include AI_IDN.

Patch from 'cvs diff' against existing files in CVS:

Index: resolv/netdb.h
===================================================================
RCS file: /cvs/glibc/libc/resolv/netdb.h,v
retrieving revision 1.42
diff -u -p -r1.42 netdb.h
--- resolv/netdb.h	24 Apr 2003 23:40:02 -0000	1.42
+++ resolv/netdb.h	7 Mar 2004 23:27:30 -0000
@@ -1,4 +1,4 @@
-  /* Copyright (C) 1996-2002, 2003 Free Software Foundation, Inc.
+  /* Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -573,6 +573,11 @@ struct gaicb
 # define AI_ALL		0x0010	/* Return IPv4 mapped and IPv6 addresses.  */
 # define AI_ADDRCONFIG	0x0020	/* Use configuration of this host to choose
 				   returned address type..  */
+# ifdef __USE_GNU
+#  define AI_IDN	0x0040	/* IDN encode input (assuming it is encoded
+				   in the current locale's character set)
+				   before looking it up. */
+# endif
 
 /* Error values for `getaddrinfo' function.  */
 # define EAI_BADFLAGS	  -1	/* Invalid value for `ai_flags' field.  */
@@ -592,6 +597,7 @@ struct gaicb
 #  define EAI_NOTCANCELED -102	/* Request not canceled.  */
 #  define EAI_ALLDONE	  -103	/* All requests done.  */
 #  define EAI_INTR	  -104	/* Interrupted by a signal.  */
+#  define EAI_IDN_ENCODE  -105	/* IDN encoding failed.  */
 # endif
 
 # define NI_MAXHOST      1025
Index: sysdeps/posix/getaddrinfo.c
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/posix/getaddrinfo.c,v
retrieving revision 1.56
diff -u -p -r1.56 getaddrinfo.c
--- sysdeps/posix/getaddrinfo.c	23 Feb 2004 19:52:58 -0000	1.56
+++ sysdeps/posix/getaddrinfo.c	7 Mar 2004 23:27:31 -0000
@@ -55,6 +55,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI
 #include <nsswitch.h>
 #include <not-cancel.h>
 
+extern int idna_to_ascii_lz (const char *input, char **output, int flags);
+#define IDNA_SUCCESS 0
+
 #define GAIH_OKIFUNSPEC 0x0100
 #define GAIH_EAI        ~(GAIH_OKIFUNSPEC)
 
@@ -539,6 +542,16 @@ gaih_inet (const char *name, const struc
       at->scopeid = 0;
       at->next = NULL;
 
+      if (req->ai_flags & AI_IDN)
+	{
+	  char *p = NULL;
+	  rc = idna_to_ascii_lz (name, &p, 0);
+	  if (rc != IDNA_SUCCESS)
+	    return -EAI_IDN_ENCODE;
+	  name = strdupa (p);
+	  free (p);
+	}
+
       if (inet_pton (AF_INET, name, at->addr) > 0)
 	{
 	  if (req->ai_family == AF_UNSPEC || req->ai_family == AF_INET)
@@ -1252,7 +1265,7 @@ getaddrinfo (const char *name, const cha
 
   if (hints->ai_flags
       & ~(AI_PASSIVE|AI_CANONNAME|AI_NUMERICHOST|AI_ADDRCONFIG|AI_V4MAPPED
-	  |AI_ALL))
+	  |AI_ALL|AI_IDN))
     return EAI_BADFLAGS;
 
   if ((hints->ai_flags & AI_CANONNAME) && name == NULL)

New files in new directory libidn/ (except guni*.h and rfc3454.c, they
are very large, which can be copied from the latest libidn release):

diff -Nur t/Banner libidn/Banner
--- t/Banner	1970-01-01 01:00:00.000000000 +0100
+++ libidn/Banner	2003-11-26 19:19:07.000000000 +0100
@@ -0,0 +1 @@
+GNU Libidn by Simon Josefsson
diff -Nur t/configure libidn/configure
--- t/configure	1970-01-01 01:00:00.000000000 +0100
+++ libidn/configure	2003-11-26 19:19:03.000000000 +0100
@@ -0,0 +1,2 @@
+# This is only to keep the GNU C library configure mechanism happy.
+exit 0
diff -Nur t/idna.c libidn/idna.c
--- t/idna.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/idna.c	2004-03-07 21:55:47.000000000 +0100
@@ -0,0 +1,797 @@
+/* idna.c	Convert to or from IDN strings.
+ * Copyright (C) 2002, 2003, 2004  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stringprep.h>
+#include <punycode.h>
+
+#include "idna.h"
+
+#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||	\
+		 (c) == 0xFF0E || (c) == 0xFF61)
+
+/* Core functions */
+
+/**
+ * idna_to_ascii_4i
+ * @in: input array with unicode code points.
+ * @inlen: length of input array with unicode code points.
+ * @out: output zero terminated string that must have room for at
+ *       least 63 characters plus the terminating zero.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * The ToASCII operation takes a sequence of Unicode code points that make
+ * up one label and transforms it into a sequence of code points in the
+ * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
+ * resulting sequence are equivalent labels.
+ *
+ * It is important to note that the ToASCII operation can fail. ToASCII
+ * fails if any step of it fails. If any step of the ToASCII operation
+ * fails on any label in a domain name, that domain name MUST NOT be used
+ * as an internationalized domain name. The method for deadling with this
+ * failure is application-specific.
+ *
+ * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
+ * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
+ * sequence of ASCII code points or a failure condition.
+ *
+ * ToASCII never alters a sequence of code points that are all in the ASCII
+ * range to begin with (although it could fail). Applying the ToASCII
+ * operation multiple times has exactly the same effect as applying it just
+ * once.
+ *
+ * Return value: Returns 0 on success, or an error code.
+ */
+int
+idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
+{
+  size_t len, outlen;
+  uint32_t *src;		/* XXX don't need to copy data? */
+  int rc;
+
+  /*
+   * ToASCII consists of the following steps:
+   *
+   * 1. If all code points in the sequence are in the ASCII range (0..7F)
+   * then skip to step 3.
+   */
+
+  {
+    size_t i;
+    int inasciirange;
+
+    inasciirange = 1;
+    for (i = 0; i < inlen; i++)
+      if (in[i] > 0x7F)
+	inasciirange = 0;
+    if (inasciirange)
+      {
+	src = malloc (sizeof (in[0]) * (inlen + 1));
+	if (src == NULL)
+	  return IDNA_MALLOC_ERROR;
+
+	memcpy (src, in, sizeof (in[0]) * inlen);
+	src[inlen] = 0;
+
+	goto step3;
+      }
+  }
+
+  /*
+   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
+   * an error. The AllowUnassigned flag is used in [NAMEPREP].
+   */
+
+  {
+    char *p;
+
+    p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
+    if (p == NULL)
+      return IDNA_MALLOC_ERROR;
+
+    len = strlen (p);
+    do
+      {
+	len = 2 * len + 10;	/* XXX better guess? */
+	p = realloc (p, len);
+	if (p == NULL)
+	  return IDNA_MALLOC_ERROR;
+
+	if (flags & IDNA_ALLOW_UNASSIGNED)
+	  rc = stringprep_nameprep (p, len);
+	else
+	  rc = stringprep_nameprep_no_unassigned (p, len);
+      }
+    while (rc == STRINGPREP_TOO_SMALL_BUFFER);
+
+    if (rc != STRINGPREP_OK)
+      {
+	free (p);
+	return IDNA_STRINGPREP_ERROR;
+      }
+
+    src = stringprep_utf8_to_ucs4 (p, -1, NULL);
+
+    free (p);
+  }
+
+step3:
+  /*
+   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
+   *
+   * (a) Verify the absence of non-LDH ASCII code points; that is,
+   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
+   *
+   * (b) Verify the absence of leading and trailing hyphen-minus;
+   * that is, the absence of U+002D at the beginning and end of
+   * the sequence.
+   */
+
+  if (flags & IDNA_USE_STD3_ASCII_RULES)
+    {
+      size_t i;
+
+      for (i = 0; src[i]; i++)
+	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
+	    (src[i] >= 0x3A && src[i] <= 0x40) ||
+	    (src[i] >= 0x5B && src[i] <= 0x60) ||
+	    (src[i] >= 0x7B && src[i] <= 0x7F))
+	  {
+	    free (src);
+	    return IDNA_CONTAINS_NON_LDH;
+	  }
+
+      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
+	{
+	  free (src);
+	  return IDNA_CONTAINS_MINUS;
+	}
+    }
+
+  /*
+   * 4. If all code points in the sequence are in the ASCII range
+   * (0..7F), then skip to step 8.
+   */
+
+  {
+    size_t i;
+    int inasciirange;
+
+    inasciirange = 1;
+    for (i = 0; src[i]; i++)
+      {
+	if (src[i] > 0x7F)
+	  inasciirange = 0;
+	/* copy string to output buffer if we are about to skip to step8 */
+	if (i < 64)
+	  out[i] = src[i];
+      }
+    if (i < 64)
+      out[i] = '\0';
+    if (inasciirange)
+      goto step8;
+  }
+
+  /*
+   * 5. Verify that the sequence does NOT begin with the ACE prefix.
+   *
+   */
+
+  {
+    size_t i;
+    int match;
+
+    match = 1;
+    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
+      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
+	match = 0;
+    if (match)
+      {
+	free (src);
+	return IDNA_CONTAINS_ACE_PREFIX;
+      }
+  }
+
+  /*
+   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
+   * and fail if there is an error.
+   */
+  for (len = 0; src[len]; len++)
+    ;
+  src[len] = '\0';
+  outlen = 63 - strlen (IDNA_ACE_PREFIX);
+  rc = punycode_encode (len, src, NULL,
+			&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
+  if (rc != PUNYCODE_SUCCESS)
+    {
+      free (src);
+      return IDNA_PUNYCODE_ERROR;
+    }
+  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
+
+  /*
+   * 7. Prepend the ACE prefix.
+   */
+
+  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
+
+  /*
+   * 8. Verify that the number of code points is in the range 1 to 63
+   * inclusive (0 is excluded).
+   */
+
+step8:
+  free (src);
+  if (strlen (out) < 1 || strlen (out) > 63)
+    return IDNA_INVALID_LENGTH;
+
+  return IDNA_SUCCESS;
+}
+
+/* ToUnicode().  May realloc() utf8in. */
+static int
+idna_to_unicode_internal (char *utf8in,
+			  uint32_t * out, size_t * outlen, int flags)
+{
+  int rc;
+  char tmpout[64];
+  size_t utf8len = strlen (utf8in) + 1;
+  size_t addlen = 0;
+
+  /*
+   * ToUnicode consists of the following steps:
+   *
+   * 1. If the sequence contains any code points outside the ASCII range
+   * (0..7F) then proceed to step 2, otherwise skip to step 3.
+   */
+
+  {
+    size_t i;
+    int inasciirange;
+
+    inasciirange = 1;
+    for (i = 0; utf8in[i]; i++)
+      if (utf8in[i] & ~0x7F)
+	inasciirange = 0;
+    if (inasciirange)
+      goto step3;
+  }
+
+  /*
+   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
+   * error. (If step 3 of ToASCII is also performed here, it will not
+   * affect the overall behavior of ToUnicode, but it is not
+   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
+   */
+  do
+    {
+      utf8in = realloc (utf8in, utf8len + addlen);
+      if (!utf8in)
+	return IDNA_MALLOC_ERROR;
+      if (flags & IDNA_ALLOW_UNASSIGNED)
+	rc = stringprep_nameprep (utf8in, utf8len + addlen);
+      else
+	rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
+      addlen += 1;
+    }
+  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
+
+  if (rc != STRINGPREP_OK)
+    return IDNA_STRINGPREP_ERROR;
+
+  /* 3. Verify that the sequence begins with the ACE prefix, and save a
+   * copy of the sequence.
+   */
+
+step3:
+  if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
+    return IDNA_NO_ACE_PREFIX;
+
+  /* 4. Remove the ACE prefix.
+   */
+
+  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
+	   strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
+
+  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
+   * and fail if there is an error. Save a copy of the result of
+   * this step.
+   */
+
+  (*outlen)--;			/* reserve one for the zero */
+
+  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
+  if (rc != PUNYCODE_SUCCESS)
+    return IDNA_PUNYCODE_ERROR;
+
+  out[*outlen] = 0;		/* add zero */
+
+  /* 6. Apply ToASCII.
+   */
+
+  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
+  if (rc != IDNA_SUCCESS)
+    return rc;
+
+  /* 7. Verify that the result of step 6 matches the saved copy from
+   * step 3, using a case-insensitive ASCII comparison.
+   */
+
+  if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
+    return IDNA_ROUNDTRIP_VERIFY_ERROR;
+
+  /* 8. Return the saved copy from step 5.
+   */
+
+  return IDNA_SUCCESS;
+}
+
+/**
+ * idna_to_unicode_44i
+ * @in: input array with unicode code points.
+ * @inlen: length of input array with unicode code points.
+ * @out: output array with unicode code points.
+ * @outlen: on input, maximum size of output array with unicode code points,
+ *          on exit, actual size of output array with unicode code points.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * The ToUnicode operation takes a sequence of Unicode code points
+ * that make up one label and returns a sequence of Unicode code
+ * points. If the input sequence is a label in ACE form, then the
+ * result is an equivalent internationalized label that is not in ACE
+ * form, otherwise the original sequence is returned unaltered.
+ *
+ * ToUnicode never fails. If any step fails, then the original input
+ * sequence is returned immediately in that step.
+ *
+ * The Punycode decoder can never output more code points than it
+ * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
+ * the number of octets needed to represent a sequence of code points
+ * depends on the particular character encoding used.
+ *
+ * The inputs to ToUnicode are a sequence of code points, the
+ * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
+ * ToUnicode is always a sequence of Unicode code points.
+ *
+ * Return value: Returns error condition, but it must only be used for
+ *               debugging purposes.  The output buffer is always
+ *               guaranteed to contain the correct data according to
+ *               the specification (sans malloc induced errors).  NB!
+ *               This means that you normally ignore the return code
+ *               from this function, as checking it means breaking the
+ *               standard.
+ */
+int
+idna_to_unicode_44i (const uint32_t * in, size_t inlen,
+		     uint32_t * out, size_t * outlen, int flags)
+{
+  int rc;
+  size_t outlensave = *outlen;
+  char *p;
+
+  p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
+  if (p == NULL)
+    return IDNA_MALLOC_ERROR;
+
+  rc = idna_to_unicode_internal (p, out, outlen, flags);
+  if (rc != IDNA_SUCCESS)
+    {
+      memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
+					 inlen : outlensave));
+      *outlen = inlen;
+    }
+
+  free (p);
+
+  return rc;
+}
+
+/* Wrappers that handle several labels */
+
+/**
+ * idna_to_ascii_4z:
+ * @input: zero terminated input Unicode string.
+ * @output: pointer to newly allocated output string.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert UCS-4 domain name to ASCII string.  The domain name may
+ * contain several labels, separated by dots.  The output buffer must
+ * be deallocated by the caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
+{
+  const uint32_t *start = input;
+  const uint32_t *end = input;
+  char buf[64];
+  char *out = NULL;
+  int rc;
+
+  /* 1) Whenever dots are used as label separators, the following
+     characters MUST be recognized as dots: U+002E (full stop),
+     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
+     U+FF61 (halfwidth ideographic full stop). */
+
+  if (input[0] == 0)
+    {
+      /* Handle implicit zero-length root label. */
+      *output = malloc (1);
+      if (!*output)
+	return IDNA_MALLOC_ERROR;
+      strcpy (*output, "");
+      return IDNA_SUCCESS;
+    }
+
+  if (DOTP (input[0]) && input[1] == 0)
+    {
+      /* Handle explicit zero-length root label. */
+      *output = malloc (2);
+      if (!*output)
+	return IDNA_MALLOC_ERROR;
+      strcpy (*output, ".");
+      return IDNA_SUCCESS;
+    }
+
+  *output = NULL;
+  do
+    {
+      end = start;
+
+      for (; *end && !DOTP (*end); end++)
+	;
+
+      if (*end == '\0' && start == end)
+	{
+	  /* Handle explicit zero-length root label. */
+	  buf[0] = '\0';
+	}
+      else
+	{
+	  rc = idna_to_ascii_4i (start, end - start, buf, flags);
+	  if (rc != IDNA_SUCCESS)
+	    return rc;
+	}
+
+      if (out)
+	{
+	  out = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
+	  if (!out)
+	    return IDNA_MALLOC_ERROR;
+	  strcat (out, ".");
+	  strcat (out, buf);
+	}
+      else
+	{
+	  out = (char *) malloc (strlen (buf) + 1);
+	  if (!out)
+	    return IDNA_MALLOC_ERROR;
+	  strcpy (out, buf);
+	}
+
+      start = end + 1;
+    }
+  while (*end);
+
+  *output = out;
+
+  return IDNA_SUCCESS;
+}
+
+/**
+ * idna_to_ascii_8z:
+ * @input: zero terminated input UTF-8 string.
+ * @output: pointer to newly allocated output string.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert UTF-8 domain name to ASCII string.  The domain name may
+ * contain several labels, separated by dots.  The output buffer must
+ * be deallocated by the caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_ascii_8z (const char *input, char **output, int flags)
+{
+  uint32_t *ucs4;
+  size_t ucs4len;
+  int rc;
+
+  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
+  if (!ucs4)
+    return IDNA_ICONV_ERROR;
+
+  rc = idna_to_ascii_4z (ucs4, output, flags);
+
+  free (ucs4);
+
+  return rc;
+
+}
+
+/**
+ * idna_to_ascii_lz:
+ * @input: zero terminated input UTF-8 string.
+ * @output: pointer to newly allocated output string.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert domain name in the locale's encoding to ASCII string.  The
+ * domain name may contain several labels, separated by dots.  The
+ * output buffer must be deallocated by the caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_ascii_lz (const char *input, char **output, int flags)
+{
+  char *utf8;
+  int rc;
+
+  utf8 = stringprep_locale_to_utf8 (input);
+  if (!utf8)
+    return IDNA_ICONV_ERROR;
+
+  rc = idna_to_ascii_8z (utf8, output, flags);
+
+  free (utf8);
+
+  return rc;
+}
+
+/**
+ * idna_to_unicode_4z4z:
+ * @input: zero-terminated Unicode string.
+ * @output: pointer to newly allocated output Unicode string.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert possibly ACE encoded domain name in UCS-4 format into a
+ * UCS-4 string.  The domain name may contain several labels,
+ * separated by dots.  The output buffer must be deallocated by the
+ * caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
+{
+  const uint32_t *start = input;
+  const uint32_t *end = input;
+  uint32_t *buf;
+  size_t buflen;
+  uint32_t *out = NULL;
+  size_t outlen = 0;
+  int rc;
+
+  *output = NULL;
+
+  do
+    {
+      end = start;
+
+      for (; *end && !DOTP (*end); end++)
+	;
+
+      buflen = end - start;
+      buf = malloc (sizeof (buf[0]) * (buflen + 1));
+      if (!buf)
+	return IDNA_MALLOC_ERROR;
+
+      rc = idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
+      /* don't check rc as per specification! */
+
+      if (out)
+	{
+	  out = realloc (out, sizeof (out[0]) * (outlen + 1 + buflen + 1));
+	  if (!out)
+	    return IDNA_MALLOC_ERROR;
+	  out[outlen++] = 0x002E;	/* '.' (full stop) */
+	  memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
+	  outlen += buflen;
+	  out[outlen] = 0x0;
+	  free (buf);
+	}
+      else
+	{
+	  out = buf;
+	  outlen = buflen;
+	  out[outlen] = 0x0;
+	}
+
+      start = end + 1;
+    }
+  while (*end);
+
+  *output = out;
+
+  return IDNA_SUCCESS;
+}
+
+/**
+ * idna_to_unicode_8z4z:
+ * @input: zero-terminated UTF-8 string.
+ * @output: pointer to newly allocated output Unicode string.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert possibly ACE encoded domain name in UTF-8 format into a
+ * UCS-4 string.  The domain name may contain several labels,
+ * separated by dots.  The output buffer must be deallocated by the
+ * caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
+{
+  uint32_t *ucs4;
+  size_t ucs4len;
+  int rc;
+
+  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
+  if (!ucs4)
+    return IDNA_ICONV_ERROR;
+
+  rc = idna_to_unicode_4z4z (ucs4, output, flags);
+  free (ucs4);
+
+  return rc;
+}
+
+/**
+ * idna_to_unicode_8z8z:
+ * @input: zero-terminated UTF-8 string.
+ * @output: pointer to newly allocated output UTF-8 string.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert possibly ACE encoded domain name in UTF-8 format into a
+ * UTF-8 string.  The domain name may contain several labels,
+ * separated by dots.  The output buffer must be deallocated by the
+ * caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_unicode_8z8z (const char *input, char **output, int flags)
+{
+  uint32_t *ucs4;
+  int rc;
+
+  rc = idna_to_unicode_8z4z (input, &ucs4, flags);
+  *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
+  free (ucs4);
+
+  if (!*output)
+    return IDNA_ICONV_ERROR;
+
+  return rc;
+}
+
+/**
+ * idna_to_unicode_8zlz:
+ * @input: zero-terminated UTF-8 string.
+ * @output: pointer to newly allocated output string encoded in the
+ *   current locale's character set.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert possibly ACE encoded domain name in UTF-8 format into a
+ * string encoded in the current locale's character set.  The domain
+ * name may contain several labels, separated by dots.  The output
+ * buffer must be deallocated by the caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_unicode_8zlz (const char *input, char **output, int flags)
+{
+  char *utf8;
+  int rc;
+
+  rc = idna_to_unicode_8z8z (input, &utf8, flags);
+  *output = stringprep_utf8_to_locale (utf8);
+  free (utf8);
+
+  if (!*output)
+    return IDNA_ICONV_ERROR;
+
+  return rc;
+}
+
+/**
+ * idna_to_unicode_lzlz:
+ * @input: zero-terminated string encoded in the current locale's
+ *   character set.
+ * @output: pointer to newly allocated output string encoded in the
+ *   current locale's character set.
+ * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
+ *
+ * Convert possibly ACE encoded domain name in the locale's character
+ * set into a string encoded in the current locale's character set.
+ * The domain name may contain several labels, separated by dots.  The
+ * output buffer must be deallocated by the caller.
+ *
+ * Return value: Returns IDNA_SUCCESS on success, or error code.
+ **/
+int
+idna_to_unicode_lzlz (const char *input, char **output, int flags)
+{
+  char *utf8;
+  int rc;
+
+  utf8 = stringprep_locale_to_utf8 (input);
+  if (!utf8)
+    return IDNA_ICONV_ERROR;
+
+  rc = idna_to_unicode_8zlz (utf8, output, flags);
+  free (utf8);
+
+  return rc;
+}
+
+/**
+ * IDNA_ACE_PREFIX
+ *
+ * The IANA allocated prefix to use for IDNA. "xn--"
+ */
+
+/**
+ * Idna_rc:
+ * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
+ *   always be zero, the remaining ones are only guaranteed to hold
+ *   non-zero values, for logical comparison purposes.
+ * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
+ * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
+ * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
+ *   the string contains non-LDH ASCII characters.
+ * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
+ *   the string contains a leading or trailing hyphen-minus (U+002D).
+ * @IDNA_INVALID_LENGTH: The final output string is not within the
+ *   (inclusive) range 1 to 63 characters.
+ * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
+ *   (for ToUnicode).
+ * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
+ *   string does not equal the input.
+ * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
+ *   ToASCII).
+ * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
+ * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
+ *   fatal error).
+ *
+ * Enumerated return codes of idna_to_ascii_4i(),
+ * idna_to_unicode_44i() functions (and functions derived from those
+ * functions).  The value 0 is guaranteed to always correspond to
+ * success.
+ */
+
+
+/**
+ * Idna_flags:
+ * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
+ *   Unicode code points.
+ * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
+ *   rules (i.e., normal host name rules).
+ *
+ * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
+ */
diff -Nur t/idna.h libidn/idna.h
--- t/idna.h	1970-01-01 01:00:00.000000000 +0100
+++ libidn/idna.h	2004-03-08 00:09:28.000000000 +0100
@@ -0,0 +1,98 @@
+/* idna.h	Declarations for IDNA.
+ * Copyright (C) 2002, 2003, 2004  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _IDNA_H
+#define _IDNA_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>		/* size_t */
+#include <stdint.h>		/* uint32_t */
+
+  /* Error codes. */
+  typedef enum
+  {
+    IDNA_SUCCESS = 0,
+    IDNA_STRINGPREP_ERROR = 1,
+    IDNA_PUNYCODE_ERROR = 2,
+    IDNA_CONTAINS_NON_LDH = 3,
+    /* Workaround typo in earlier versions. */
+    IDNA_CONTAINS_LDH = IDNA_CONTAINS_NON_LDH,
+    IDNA_CONTAINS_MINUS = 4,
+    IDNA_INVALID_LENGTH = 5,
+    IDNA_NO_ACE_PREFIX = 6,
+    IDNA_ROUNDTRIP_VERIFY_ERROR = 7,
+    IDNA_CONTAINS_ACE_PREFIX = 8,
+    IDNA_ICONV_ERROR = 9,
+    /* Internal errors. */
+    IDNA_MALLOC_ERROR = 201,
+    IDNA_DLOPEN_ERROR = 202
+  } Idna_rc;
+
+  /* IDNA flags */
+  typedef enum
+  {
+    IDNA_ALLOW_UNASSIGNED = 0x0001,
+    IDNA_USE_STD3_ASCII_RULES = 0x0002
+  } Idna_flags;
+
+#ifndef IDNA_ACE_PREFIX
+#define IDNA_ACE_PREFIX "xn--"
+#endif
+
+  /* Core functions */
+  extern int idna_to_ascii_4i (const uint32_t * in, size_t inlen,
+			       char *out, int flags);
+  extern int idna_to_unicode_44i (const uint32_t * in, size_t inlen,
+				  uint32_t * out, size_t * outlen, int flags);
+
+  /* Wrappers that handle several labels */
+
+  extern int idna_to_ascii_4z (const uint32_t * input,
+			       char **output, int flags);
+
+  extern int idna_to_ascii_8z (const char *input, char **output, int flags);
+
+  extern int idna_to_ascii_lz (const char *input, char **output, int flags);
+
+
+  extern int idna_to_unicode_4z4z (const uint32_t * input,
+				   uint32_t ** output, int flags);
+
+  extern int idna_to_unicode_8z4z (const char *input,
+				   uint32_t ** output, int flags);
+
+  extern int idna_to_unicode_8z8z (const char *input,
+				   char **output, int flags);
+
+  extern int idna_to_unicode_8zlz (const char *input,
+				   char **output, int flags);
+
+  extern int idna_to_unicode_lzlz (const char *input,
+				   char **output, int flags);
+
+#ifdef __cplusplus
+}
+#endif
+#endif				/* _PUNYCODE_H */
diff -Nur t/idn-stub.c libidn/idn-stub.c
--- t/idn-stub.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/idn-stub.c	2004-03-08 00:10:41.000000000 +0100
@@ -0,0 +1,49 @@
+/* idn-stub.c --- Stub to dlopen libcidn.so and invoke idna_to_ascii_lz.
+ * Copyright (C) 2003, 2004  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdio.h>
+#include <dlfcn.h>
+
+/* Get specification for idna_to_ascii_lz. */
+#include "idna.h"
+
+/* Stub to dlopen libcidn.so and invoke the real idna_to_ascii_lz, or
+   return IDNA_DLOPEN_ERROR on failure.  */
+int
+idna_to_ascii_lz (const char *input, char **output, int flags)
+{
+  void *h;
+  int (*to_ascii_lz) (const char *input, char **output, int flags);
+  int rc;
+
+  h = __libc_dlopen ("libcidn.so");
+
+  if (!h)
+    return IDNA_DLOPEN_ERROR;
+
+  to_ascii_lz = __libc_dlsym (h, "idna_to_ascii_lz");
+
+  rc = to_ascii_lz (input, output, flags);
+
+  __libc_dlclose(h);
+
+  return rc;
+}
diff -Nur t/Makefile libidn/Makefile
--- t/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ libidn/Makefile	2004-03-07 23:29:32.000000000 +0100
@@ -0,0 +1,34 @@
+# Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307 USA.
+
+# Makefile for libidn subdirectory of GNU C Library.
+
+subdir	:= libidn
+
+distribute := punycode.h stringprep.h idna.h
+
+routines = idn-stub
+
+extra-libs		= libcidn
+extra-libs-others	= $(extra-libs)
+
+libcidn-routines := punycode toutf8 nfkc stringprep rfc3454 profiles idna
+
+include ../Rules
+
+$(objpfx)libcidn.so: $(common-objpfx)libc.so $(common-objpfx)libc_nonshared.a
diff -Nur t/nfkc.c libidn/nfkc.c
--- t/nfkc.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/nfkc.c	2003-11-26 21:42:54.000000000 +0100
@@ -0,0 +1,1058 @@
+/* nfkc.c	Unicode normalization utilities.
+ * Copyright (C) 2002, 2003  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "stringprep.h"
+
+/* This file contains functions from GLIB, including gutf8.c and
+ * gunidecomp.c, all licensed under LGPL and copyright hold by:
+ *
+ *  Copyright (C) 1999, 2000 Tom Tromey
+ *  Copyright 2000 Red Hat, Inc.
+ */
+
+/* Hacks to make syncing with GLIB code easier. */
+#define gboolean int
+#define gchar char
+#define guchar unsigned char
+#define glong long
+#define gint int
+#define guint unsigned int
+#define gushort unsigned short
+#define gint16 int16_t
+#define guint16 uint16_t
+#define gunichar uint32_t
+#define gsize size_t
+#define gssize ssize_t
+#define g_malloc malloc
+#define g_free free
+#define GError void
+#define g_set_error(a,b,c,d) 0
+#define g_new(struct_type, n_structs)					\
+  ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs))))
+#  if defined (__GNUC__) && !defined (__STRICT_ANSI__) && !defined (__cplusplus)
+#    define G_STMT_START	(void)(
+#    define G_STMT_END		)
+#  else
+#    if (defined (sun) || defined (__sun__))
+#      define G_STMT_START	if (1)
+#      define G_STMT_END	else (void)0
+#    else
+#      define G_STMT_START	do
+#      define G_STMT_END	while (0)
+#    endif
+#  endif
+#define g_return_val_if_fail(expr,val)		G_STMT_START{ (void)0; }G_STMT_END
+#define G_N_ELEMENTS(arr)		(sizeof (arr) / sizeof ((arr)[0]))
+#define TRUE 1
+#define FALSE 0
+
+/* Code from GLIB gunicode.h starts here. */
+
+typedef enum
+{
+  G_NORMALIZE_DEFAULT,
+  G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
+  G_NORMALIZE_DEFAULT_COMPOSE,
+  G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
+  G_NORMALIZE_ALL,
+  G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
+  G_NORMALIZE_ALL_COMPOSE,
+  G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
+}
+GNormalizeMode;
+
+/* Code from GLIB gutf8.c starts here. */
+
+#define UTF8_COMPUTE(Char, Mask, Len)		\
+  if (Char < 128)				\
+    {						\
+      Len = 1;					\
+      Mask = 0x7f;				\
+    }						\
+  else if ((Char & 0xe0) == 0xc0)		\
+    {						\
+      Len = 2;					\
+      Mask = 0x1f;				\
+    }						\
+  else if ((Char & 0xf0) == 0xe0)		\
+    {						\
+      Len = 3;					\
+      Mask = 0x0f;				\
+    }						\
+  else if ((Char & 0xf8) == 0xf0)		\
+    {						\
+      Len = 4;					\
+      Mask = 0x07;				\
+    }						\
+  else if ((Char & 0xfc) == 0xf8)		\
+    {						\
+      Len = 5;					\
+      Mask = 0x03;				\
+    }						\
+  else if ((Char & 0xfe) == 0xfc)		\
+    {						\
+      Len = 6;					\
+      Mask = 0x01;				\
+    }						\
+  else						\
+    Len = -1;
+
+#define UTF8_LENGTH(Char)			\
+  ((Char) < 0x80 ? 1 :				\
+   ((Char) < 0x800 ? 2 :			\
+    ((Char) < 0x10000 ? 3 :			\
+     ((Char) < 0x200000 ? 4 :			\
+      ((Char) < 0x4000000 ? 5 : 6)))))
+
+
+#define UTF8_GET(Result, Chars, Count, Mask, Len)	\
+  (Result) = (Chars)[0] & (Mask);			\
+  for ((Count) = 1; (Count) < (Len); ++(Count))		\
+    {							\
+      if (((Chars)[(Count)] & 0xc0) != 0x80)		\
+	{						\
+	  (Result) = -1;				\
+	  break;					\
+	}						\
+      (Result) <<= 6;					\
+      (Result) |= ((Chars)[(Count)] & 0x3f);		\
+    }
+
+#define UNICODE_VALID(Char)			\
+  ((Char) < 0x110000 &&				\
+   (((Char) & 0xFFFFF800) != 0xD800) &&		\
+   ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&	\
+   ((Char) & 0xFFFE) != 0xFFFE)
+
+
+static const gchar utf8_skip_data[256] = {
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
+  5, 5, 5, 6, 6, 1, 1
+};
+
+const gchar *const g_utf8_skip = utf8_skip_data;
+
+#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
+
+/*
+ * g_utf8_strlen:
+ * @p: pointer to the start of a UTF-8 encoded string.
+ * @max: the maximum number of bytes to examine. If @max
+ *       is less than 0, then the string is assumed to be
+ *       nul-terminated. If @max is 0, @p will not be examined and
+ *       may be %NULL.
+ *
+ * Returns the length of the string in characters.
+ *
+ * Return value: the length of the string in characters
+ **/
+static glong
+g_utf8_strlen (const gchar * p, gssize max)
+{
+  glong len = 0;
+  const gchar *start = p;
+  g_return_val_if_fail (p != NULL || max == 0, 0);
+
+  if (max < 0)
+    {
+      while (*p)
+	{
+	  p = g_utf8_next_char (p);
+	  ++len;
+	}
+    }
+  else
+    {
+      if (max == 0 || !*p)
+	return 0;
+
+      p = g_utf8_next_char (p);
+
+      while (p - start < max && *p)
+	{
+	  ++len;
+	  p = g_utf8_next_char (p);
+	}
+
+      /* only do the last len increment if we got a complete
+       * char (don't count partial chars)
+       */
+      if (p - start == max)
+	++len;
+    }
+
+  return len;
+}
+
+/*
+ * g_utf8_get_char:
+ * @p: a pointer to Unicode character encoded as UTF-8
+ *
+ * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
+ * If @p does not point to a valid UTF-8 encoded character, results are
+ * undefined. If you are not sure that the bytes are complete
+ * valid Unicode characters, you should use g_utf8_get_char_validated()
+ * instead.
+ *
+ * Return value: the resulting character
+ **/
+static gunichar
+g_utf8_get_char (const gchar * p)
+{
+  int i, mask = 0, len;
+  gunichar result;
+  unsigned char c = (unsigned char) *p;
+
+  UTF8_COMPUTE (c, mask, len);
+  if (len == -1)
+    return (gunichar) - 1;
+  UTF8_GET (result, p, i, mask, len);
+
+  return result;
+}
+
+/*
+ * g_unichar_to_utf8:
+ * @c: a ISO10646 character code
+ * @outbuf: output buffer, must have at least 6 bytes of space.
+ *       If %NULL, the length will be computed and returned
+ *       and nothing will be written to @outbuf.
+ *
+ * Converts a single character to UTF-8.
+ *
+ * Return value: number of bytes written
+ **/
+static int
+g_unichar_to_utf8 (gunichar c, gchar * outbuf)
+{
+  guint len = 0;
+  int first;
+  int i;
+
+  if (c < 0x80)
+    {
+      first = 0;
+      len = 1;
+    }
+  else if (c < 0x800)
+    {
+      first = 0xc0;
+      len = 2;
+    }
+  else if (c < 0x10000)
+    {
+      first = 0xe0;
+      len = 3;
+    }
+  else if (c < 0x200000)
+    {
+      first = 0xf0;
+      len = 4;
+    }
+  else if (c < 0x4000000)
+    {
+      first = 0xf8;
+      len = 5;
+    }
+  else
+    {
+      first = 0xfc;
+      len = 6;
+    }
+
+  if (outbuf)
+    {
+      for (i = len - 1; i > 0; --i)
+	{
+	  outbuf[i] = (c & 0x3f) | 0x80;
+	  c >>= 6;
+	}
+      outbuf[0] = c | first;
+    }
+
+  return len;
+}
+
+/*
+ * g_utf8_to_ucs4_fast:
+ * @str: a UTF-8 encoded string
+ * @len: the maximum length of @str to use. If @len < 0, then
+ *       the string is nul-terminated.
+ * @items_written: location to store the number of characters in the
+ *                 result, or %NULL.
+ *
+ * Convert a string from UTF-8 to a 32-bit fixed width
+ * representation as UCS-4, assuming valid UTF-8 input.
+ * This function is roughly twice as fast as g_utf8_to_ucs4()
+ * but does no error checking on the input.
+ *
+ * Return value: a pointer to a newly allocated UCS-4 string.
+ *               This value must be freed with g_free().
+ **/
+static gunichar *
+g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written)
+{
+  gint j, charlen;
+  gunichar *result;
+  gint n_chars, i;
+  const gchar *p;
+
+  g_return_val_if_fail (str != NULL, NULL);
+
+  p = str;
+  n_chars = 0;
+  if (len < 0)
+    {
+      while (*p)
+	{
+	  p = g_utf8_next_char (p);
+	  ++n_chars;
+	}
+    }
+  else
+    {
+      while (p < str + len && *p)
+	{
+	  p = g_utf8_next_char (p);
+	  ++n_chars;
+	}
+    }
+
+  result = g_new (gunichar, n_chars + 1);
+  if (!result)
+    return NULL;
+
+  p = str;
+  for (i = 0; i < n_chars; i++)
+    {
+      gunichar wc = ((unsigned char *) p)[0];
+
+      if (wc < 0x80)
+	{
+	  result[i] = wc;
+	  p++;
+	}
+      else
+	{
+	  if (wc < 0xe0)
+	    {
+	      charlen = 2;
+	      wc &= 0x1f;
+	    }
+	  else if (wc < 0xf0)
+	    {
+	      charlen = 3;
+	      wc &= 0x0f;
+	    }
+	  else if (wc < 0xf8)
+	    {
+	      charlen = 4;
+	      wc &= 0x07;
+	    }
+	  else if (wc < 0xfc)
+	    {
+	      charlen = 5;
+	      wc &= 0x03;
+	    }
+	  else
+	    {
+	      charlen = 6;
+	      wc &= 0x01;
+	    }
+
+	  for (j = 1; j < charlen; j++)
+	    {
+	      wc <<= 6;
+	      wc |= ((unsigned char *) p)[j] & 0x3f;
+	    }
+
+	  result[i] = wc;
+	  p += charlen;
+	}
+    }
+  result[i] = 0;
+
+  if (items_written)
+    *items_written = i;
+
+  return result;
+}
+
+/*
+ * g_ucs4_to_utf8:
+ * @str: a UCS-4 encoded string
+ * @len: the maximum length of @str to use. If @len < 0, then
+ *       the string is terminated with a 0 character.
+ * @items_read: location to store number of characters read read, or %NULL.
+ * @items_written: location to store number of bytes written or %NULL.
+ *                 The value here stored does not include the trailing 0
+ *                 byte.
+ * @error: location to store the error occuring, or %NULL to ignore
+ *         errors. Any of the errors in #GConvertError other than
+ *         %G_CONVERT_ERROR_NO_CONVERSION may occur.
+ *
+ * Convert a string from a 32-bit fixed width representation as UCS-4.
+ * to UTF-8. The result will be terminated with a 0 byte.
+ *
+ * Return value: a pointer to a newly allocated UTF-8 string.
+ *               This value must be freed with g_free(). If an
+ *               error occurs, %NULL will be returned and
+ *               @error set.
+ **/
+static gchar *
+g_ucs4_to_utf8 (const gunichar * str,
+		glong len,
+		glong * items_read, glong * items_written, GError ** error)
+{
+  gint result_length;
+  gchar *result = NULL;
+  gchar *p;
+  gint i;
+
+  result_length = 0;
+  for (i = 0; len < 0 || i < len; i++)
+    {
+      if (!str[i])
+	break;
+
+      if (str[i] >= 0x80000000)
+	{
+	  if (items_read)
+	    *items_read = i;
+
+	  g_set_error (error, G_CONVERT_ERROR,
+		       G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+		       _("Character out of range for UTF-8"));
+	  goto err_out;
+	}
+
+      result_length += UTF8_LENGTH (str[i]);
+    }
+
+  result = g_malloc (result_length + 1);
+  if (!result)
+    return NULL;
+  p = result;
+
+  i = 0;
+  while (p < result + result_length)
+    p += g_unichar_to_utf8 (str[i++], p);
+
+  *p = '\0';
+
+  if (items_written)
+    *items_written = p - result;
+
+err_out:
+  if (items_read)
+    *items_read = i;
+
+  return result;
+}
+
+/* Code from GLIB gunidecomp.c starts here. */
+
+#include "gunidecomp.h"
+#include "gunicomp.h"
+
+#define CC_PART1(Page, Char) \
+  ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
+   ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
+   : (cclass_data[combining_class_table_part1[Page]][Char]))
+
+#define CC_PART2(Page, Char) \
+  ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
+   ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
+   : (cclass_data[combining_class_table_part2[Page]][Char]))
+
+#define COMBINING_CLASS(Char) \
+  (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
+   ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
+   : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
+      ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
+      : 0))
+
+/* constants for hangul syllable [de]composition */
+#define SBase 0xAC00
+#define LBase 0x1100
+#define VBase 0x1161
+#define TBase 0x11A7
+#define LCount 19
+#define VCount 21
+#define TCount 28
+#define NCount (VCount * TCount)
+#define SCount (LCount * NCount)
+
+/*
+ * g_unicode_canonical_ordering:
+ * @string: a UCS-4 encoded string.
+ * @len: the maximum length of @string to use.
+ *
+ * Computes the canonical ordering of a string in-place. 
+ * This rearranges decomposed characters in the string
+ * according to their combining classes.  See the Unicode
+ * manual for more information.
+ **/
+static void
+g_unicode_canonical_ordering (gunichar * string, gsize len)
+{
+  gsize i;
+  int swap = 1;
+
+  while (swap)
+    {
+      int last;
+      swap = 0;
+      last = COMBINING_CLASS (string[0]);
+      for (i = 0; i < len - 1; ++i)
+	{
+	  int next = COMBINING_CLASS (string[i + 1]);
+	  if (next != 0 && last > next)
+	    {
+	      gsize j;
+	      /* Percolate item leftward through string.  */
+	      for (j = i + 1; j > 0; --j)
+		{
+		  gunichar t;
+		  if (COMBINING_CLASS (string[j - 1]) <= next)
+		    break;
+		  t = string[j];
+		  string[j] = string[j - 1];
+		  string[j - 1] = t;
+		  swap = 1;
+		}
+	      /* We're re-entering the loop looking at the old
+	         character again.  */
+	      next = last;
+	    }
+	  last = next;
+	}
+    }
+}
+
+/* http://www.unicode.org/unicode/reports/tr15/#Hangul
+ * r should be null or have sufficient space. Calling with r == NULL will
+ * only calculate the result_len; however, a buffer with space for three
+ * characters will always be big enough. */
+static void
+decompose_hangul (gunichar s, gunichar * r, gsize * result_len)
+{
+  gint SIndex = s - SBase;
+
+  /* not a hangul syllable */
+  if (SIndex < 0 || SIndex >= SCount)
+    {
+      if (r)
+	r[0] = s;
+      *result_len = 1;
+    }
+  else
+    {
+      gunichar L = LBase + SIndex / NCount;
+      gunichar V = VBase + (SIndex % NCount) / TCount;
+      gunichar T = TBase + SIndex % TCount;
+
+      if (r)
+	{
+	  r[0] = L;
+	  r[1] = V;
+	}
+
+      if (T != TBase)
+	{
+	  if (r)
+	    r[2] = T;
+	  *result_len = 3;
+	}
+      else
+	*result_len = 2;
+    }
+}
+
+/* returns a pointer to a null-terminated UTF-8 string */
+static const gchar *
+find_decomposition (gunichar ch, gboolean compat)
+{
+  int start = 0;
+  int end = G_N_ELEMENTS (decomp_table);
+
+  if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
+    {
+      while (TRUE)
+	{
+	  int half = (start + end) / 2;
+	  if (ch == decomp_table[half].ch)
+	    {
+	      int offset;
+
+	      if (compat)
+		{
+		  offset = decomp_table[half].compat_offset;
+		  if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
+		    offset = decomp_table[half].canon_offset;
+		}
+	      else
+		{
+		  offset = decomp_table[half].canon_offset;
+		  if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
+		    return NULL;
+		}
+
+	      return &(decomp_expansion_string[offset]);
+	    }
+	  else if (half == start)
+	    break;
+	  else if (ch > decomp_table[half].ch)
+	    start = half;
+	  else
+	    end = half;
+	}
+    }
+
+  return NULL;
+}
+
+/* L,V => LV and LV,T => LVT  */
+static gboolean
+combine_hangul (gunichar a, gunichar b, gunichar * result)
+{
+  gint LIndex = a - LBase;
+  gint SIndex = a - SBase;
+
+  gint VIndex = b - VBase;
+  gint TIndex = b - TBase;
+
+  if (0 <= LIndex && LIndex < LCount && 0 <= VIndex && VIndex < VCount)
+    {
+      *result = SBase + (LIndex * VCount + VIndex) * TCount;
+      return TRUE;
+    }
+  else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0
+	   && 0 <= TIndex && TIndex <= TCount)
+    {
+      *result = a + TIndex;
+      return TRUE;
+    }
+
+  return FALSE;
+}
+
+#define CI(Page, Char) \
+  ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
+   ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
+   : (compose_data[compose_table[Page]][Char]))
+
+#define COMPOSE_INDEX(Char) \
+     ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
+
+static gboolean
+combine (gunichar a, gunichar b, gunichar * result)
+{
+  gushort index_a, index_b;
+
+  if (combine_hangul (a, b, result))
+    return TRUE;
+
+  index_a = COMPOSE_INDEX (a);
+
+  if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
+    {
+      if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
+	{
+	  *result =
+	    compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
+	  return TRUE;
+	}
+      else
+	return FALSE;
+    }
+
+  index_b = COMPOSE_INDEX (b);
+
+  if (index_b >= COMPOSE_SECOND_SINGLE_START)
+    {
+      if (a ==
+	  compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
+	{
+	  *result =
+	    compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
+	  return TRUE;
+	}
+      else
+	return FALSE;
+    }
+
+  if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
+      && index_b >= COMPOSE_SECOND_START
+      && index_b < COMPOSE_SECOND_SINGLE_START)
+    {
+      gunichar res =
+	compose_array[index_a - COMPOSE_FIRST_START][index_b -
+						     COMPOSE_SECOND_START];
+
+      if (res)
+	{
+	  *result = res;
+	  return TRUE;
+	}
+    }
+
+  return FALSE;
+}
+
+static gunichar *
+_g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
+{
+  gsize n_wc;
+  gunichar *wc_buffer;
+  const char *p;
+  gsize last_start;
+  gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
+  gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);
+
+  n_wc = 0;
+  p = str;
+  while ((max_len < 0 || p < str + max_len) && *p)
+    {
+      const gchar *decomp;
+      gunichar wc = g_utf8_get_char (p);
+
+      if (wc >= 0xac00 && wc <= 0xd7af)
+	{
+	  gsize result_len;
+	  decompose_hangul (wc, NULL, &result_len);
+	  n_wc += result_len;
+	}
+      else
+	{
+	  decomp = find_decomposition (wc, do_compat);
+
+	  if (decomp)
+	    n_wc += g_utf8_strlen (decomp, -1);
+	  else
+	    n_wc++;
+	}
+
+      p = g_utf8_next_char (p);
+    }
+
+  wc_buffer = g_new (gunichar, n_wc + 1);
+  if (!wc_buffer)
+    return NULL;
+
+  last_start = 0;
+  n_wc = 0;
+  p = str;
+  while ((max_len < 0 || p < str + max_len) && *p)
+    {
+      gunichar wc = g_utf8_get_char (p);
+      const gchar *decomp;
+      int cc;
+      gsize old_n_wc = n_wc;
+
+      if (wc >= 0xac00 && wc <= 0xd7af)
+	{
+	  gsize result_len;
+	  decompose_hangul (wc, wc_buffer + n_wc, &result_len);
+	  n_wc += result_len;
+	}
+      else
+	{
+	  decomp = find_decomposition (wc, do_compat);
+
+	  if (decomp)
+	    {
+	      const char *pd;
+	      for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
+		wc_buffer[n_wc++] = g_utf8_get_char (pd);
+	    }
+	  else
+	    wc_buffer[n_wc++] = wc;
+	}
+
+      if (n_wc > 0)
+	{
+	  cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
+
+	  if (cc == 0)
+	    {
+	      g_unicode_canonical_ordering (wc_buffer + last_start,
+					    n_wc - last_start);
+	      last_start = old_n_wc;
+	    }
+	}
+
+      p = g_utf8_next_char (p);
+    }
+
+  if (n_wc > 0)
+    {
+      g_unicode_canonical_ordering (wc_buffer + last_start,
+				    n_wc - last_start);
+      last_start = n_wc;
+    }
+
+  wc_buffer[n_wc] = 0;
+
+  /* All decomposed and reordered */
+
+  if (do_compose && n_wc > 0)
+    {
+      gsize i, j;
+      int last_cc = 0;
+      last_start = 0;
+
+      for (i = 0; i < n_wc; i++)
+	{
+	  int cc = COMBINING_CLASS (wc_buffer[i]);
+
+	  if (i > 0 &&
+	      (last_cc == 0 || last_cc != cc) &&
+	      combine (wc_buffer[last_start], wc_buffer[i],
+		       &wc_buffer[last_start]))
+	    {
+	      for (j = i + 1; j < n_wc; j++)
+		wc_buffer[j - 1] = wc_buffer[j];
+	      n_wc--;
+	      i--;
+
+	      if (i == last_start)
+		last_cc = 0;
+	      else
+		last_cc = COMBINING_CLASS (wc_buffer[i - 1]);
+
+	      continue;
+	    }
+
+	  if (cc == 0)
+	    last_start = i;
+
+	  last_cc = cc;
+	}
+    }
+
+  wc_buffer[n_wc] = 0;
+
+  return wc_buffer;
+}
+
+/*
+ * g_utf8_normalize:
+ * @str: a UTF-8 encoded string.
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
+ * @mode: the type of normalization to perform.
+ *
+ * Converts a string into canonical form, standardizing
+ * such issues as whether a character with an accent
+ * is represented as a base character and combining
+ * accent or as a single precomposed character. You
+ * should generally call g_utf8_normalize() before
+ * comparing two Unicode strings.
+ *
+ * The normalization mode %G_NORMALIZE_DEFAULT only
+ * standardizes differences that do not affect the
+ * text content, such as the above-mentioned accent
+ * representation. %G_NORMALIZE_ALL also standardizes
+ * the "compatibility" characters in Unicode, such
+ * as SUPERSCRIPT THREE to the standard forms
+ * (in this case DIGIT THREE). Formatting information
+ * may be lost but for most text operations such
+ * characters should be considered the same.
+ * For example, g_utf8_collate() normalizes
+ * with %G_NORMALIZE_ALL as its first step.
+ *
+ * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
+ * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
+ * but returned a result with composed forms rather
+ * than a maximally decomposed form. This is often
+ * useful if you intend to convert the string to
+ * a legacy encoding or pass it to a system with
+ * less capable Unicode handling.
+ *
+ * Return value: a newly allocated string, that is the
+ *   normalized form of @str.
+ **/
+static gchar *
+g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode)
+{
+  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
+  gchar *result;
+
+  result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
+  g_free (result_wc);
+
+  return result;
+}
+
+/* Public Libidn API starts here. */
+
+/**
+ * stringprep_utf8_to_unichar:
+ * @p: a pointer to Unicode character encoded as UTF-8
+ *
+ * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
+ * If @p does not point to a valid UTF-8 encoded character, results are
+ * undefined.
+ *
+ * Return value: the resulting character.
+ **/
+uint32_t
+stringprep_utf8_to_unichar (const char *p)
+{
+  return g_utf8_get_char (p);
+}
+
+/**
+ * stringprep_unichar_to_utf8:
+ * @c: a ISO10646 character code
+ * @outbuf: output buffer, must have at least 6 bytes of space.
+ *       If %NULL, the length will be computed and returned
+ *       and nothing will be written to @outbuf.
+ *
+ * Converts a single character to UTF-8.
+ *
+ * Return value: number of bytes written.
+ **/
+int
+stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
+{
+  return g_unichar_to_utf8 (c, outbuf);
+}
+
+/**
+ * stringprep_utf8_to_ucs4:
+ * @str: a UTF-8 encoded string
+ * @len: the maximum length of @str to use. If @len < 0, then
+ *       the string is nul-terminated.
+ * @items_written: location to store the number of characters in the
+ *                 result, or %NULL.
+ *
+ * Convert a string from UTF-8 to a 32-bit fixed width
+ * representation as UCS-4, assuming valid UTF-8 input.
+ * This function does no error checking on the input.
+ *
+ * Return value: a pointer to a newly allocated UCS-4 string.
+ *               This value must be freed with free().
+ **/
+uint32_t *
+stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written)
+{
+  return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written);
+}
+
+/**
+ * stringprep_ucs4_to_utf8:
+ * @str: a UCS-4 encoded string
+ * @len: the maximum length of @str to use. If @len < 0, then
+ *       the string is terminated with a 0 character.
+ * @items_read: location to store number of characters read read, or %NULL.
+ * @items_written: location to store number of bytes written or %NULL.
+ *                 The value here stored does not include the trailing 0
+ *                 byte.
+ *
+ * Convert a string from a 32-bit fixed width representation as UCS-4.
+ * to UTF-8. The result will be terminated with a 0 byte.
+ *
+ * Return value: a pointer to a newly allocated UTF-8 string.
+ *               This value must be freed with free(). If an
+ *               error occurs, %NULL will be returned and
+ *               @error set.
+ **/
+char *
+stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len,
+			 size_t * items_read, size_t * items_written)
+{
+  return g_ucs4_to_utf8 (str, len, (glong *) items_read,
+			 (glong *) items_written, NULL);
+}
+
+/**
+ * stringprep_utf8_nfkc_normalize:
+ * @str: a UTF-8 encoded string.
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
+ *
+ * Converts a string into canonical form, standardizing
+ * such issues as whether a character with an accent
+ * is represented as a base character and combining
+ * accent or as a single precomposed character.
+ *
+ * The normalization mode is NFKC (ALL COMPOSE).  It standardizes
+ * differences that do not affect the text content, such as the
+ * above-mentioned accent representation. It standardizes the
+ * "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to
+ * the standard forms (in this case DIGIT THREE). Formatting
+ * information may be lost but for most text operations such
+ * characters should be considered the same. It returns a result with
+ * composed forms rather than a maximally decomposed form.
+ *
+ * Return value: a newly allocated string, that is the
+ *   NFKC normalized form of @str.
+ **/
+char *
+stringprep_utf8_nfkc_normalize (const char *str, ssize_t len)
+{
+  return g_utf8_normalize (str, len, G_NORMALIZE_NFKC);
+}
+
+/**
+ * stringprep_ucs4_nfkc_normalize:
+ * @str: a Unicode string.
+ * @len: length of @str array, or -1 if @str is nul-terminated.
+ *
+ * Converts UCS4 string into UTF-8 and runs
+ * stringprep_utf8_nfkc_normalize().
+ *
+ * Return value: a newly allocated Unicode string, that is the NFKC
+ *   normalized form of @str.
+ **/
+uint32_t *
+stringprep_ucs4_nfkc_normalize (uint32_t * str, ssize_t len)
+{
+  char *p;
+  uint32_t *result_wc;
+
+  p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
+  result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
+  free (p);
+
+  return result_wc;
+}
diff -Nur t/profiles.c libidn/profiles.c
--- t/profiles.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/profiles.c	2004-03-07 22:34:41.000000000 +0100
@@ -0,0 +1,310 @@
+/* profiles.c	Definitions of stringprep profiles.
+ * Copyright (C) 2002, 2003, 2004  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "stringprep.h"
+
+const Stringprep_profiles stringprep_profiles[] = {
+  {"Nameprep", stringprep_nameprep},
+  {"KRBprep", stringprep_kerberos5},	/* Deprecate? */
+  {"Nodeprep", stringprep_xmpp_nodeprep},
+  {"Resourceprep", stringprep_xmpp_resourceprep},
+  {"plain", stringprep_plain},	/* sasl-anon-00. */
+  {"trace", stringprep_trace},	/* sasl-anon-01,02. */
+  {"SASLprep", stringprep_saslprep},
+  {"ISCSIprep", stringprep_iscsi},	/* Obsolete. */
+  {"iSCSI", stringprep_iscsi},	/* IANA. */
+  {NULL, NULL}
+};
+
+const Stringprep_profile stringprep_nameprep[] = {
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, ~STRINGPREP_NO_BIDI,
+   stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, 0, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, 0, stringprep_rfc3454_D_2},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
+
+const Stringprep_profile stringprep_kerberos5[] = {
+  /* XXX this is likely to be wrong as the specification is
+     a rough draft. */
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_3},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, ~STRINGPREP_NO_BIDI,
+   stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, 0, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, 0, stringprep_rfc3454_D_2},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
+
+const Stringprep_table_element stringprep_xmpp_nodeprep_prohibit[] = {
+  {0x000022},			/* #x22 (") */
+  {0x000026},			/* #x26 (&) */
+  {0x000027},			/* #x27 (') */
+  {0x00002F},			/* #x2F (/) */
+  {0x00003A},			/* #x3A (:) */
+  {0x00003C},			/* #x3C (<) */
+  {0x00003E},			/* #x3E (>) */
+  {0x000040},			/* #x40 (@) */
+  {0}
+};
+
+const Stringprep_profile stringprep_xmpp_nodeprep[] = {
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_xmpp_nodeprep_prohibit},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, 0, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, 0, stringprep_rfc3454_D_2},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
+
+const Stringprep_profile stringprep_xmpp_resourceprep[] = {
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
+
+const Stringprep_profile stringprep_plain[] = {
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2},
+  {0}
+};
+
+const Stringprep_profile stringprep_trace[] = {
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2},
+  {0}
+};
+
+const Stringprep_table_element stringprep_iscsi_prohibit[] = {
+  {0x0000},			/* [ASCII CONTROL CHARACTERS and SPACE through ,] */
+  {0x0001},
+  {0x0002},
+  {0x0003},
+  {0x0004},
+  {0x0005},
+  {0x0006},
+  {0x0007},
+  {0x0008},
+  {0x0009},
+  {0x000A},
+  {0x000B},
+  {0x000C},
+  {0x000D},
+  {0x000E},
+  {0x000F},
+  {0x0010},
+  {0x0011},
+  {0x0012},
+  {0x0013},
+  {0x0014},
+  {0x0015},
+  {0x0016},
+  {0x0017},
+  {0x0018},
+  {0x0019},
+  {0x001A},
+  {0x001B},
+  {0x001C},
+  {0x001D},
+  {0x001E},
+  {0x001F},
+  {0x0020},
+  {0x0021},
+  {0x0022},
+  {0x0023},
+  {0x0024},
+  {0x0025},
+  {0x0026},
+  {0x0027},
+  {0x0028},
+  {0x0029},
+  {0x002A},
+  {0x002B},
+  {0x002C},
+  {0x002F},			/* [ASCII /] */
+  {0x003B},			/* [ASCII ; through @] */
+  {0x003C},
+  {0x003D},
+  {0x003E},
+  {0x003F},
+  {0x0040},
+  {0x005B},			/* [ASCII [ through `] */
+  {0x005C},
+  {0x005D},
+  {0x005E},
+  {0x005F},
+  {0x0060},
+  {0x007B},			/* [ASCII { through DEL] */
+  {0x007C},
+  {0x007D},
+  {0x007E},
+  {0x007F},
+  {0x3002},			/* ideographic full stop */
+  {0}
+};
+
+const Stringprep_profile stringprep_iscsi[] = {
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_iscsi_prohibit},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
+
+const Stringprep_table_element stringprep_saslprep_space_map[] = {
+  {0x0000A0, 0, {0x0020}},	/* 00A0; NO-BREAK SPACE */
+  {0x001680, 0, {0x0020}},	/* 1680; OGHAM SPACE MARK */
+  {0x002000, 0, {0x0020}},	/* 2000; EN QUAD */
+  {0x002001, 0, {0x0020}},	/* 2001; EM QUAD */
+  {0x002002, 0, {0x0020}},	/* 2002; EN SPACE */
+  {0x002003, 0, {0x0020}},	/* 2003; EM SPACE */
+  {0x002004, 0, {0x0020}},	/* 2004; THREE-PER-EM SPACE */
+  {0x002005, 0, {0x0020}},	/* 2005; FOUR-PER-EM SPACE */
+  {0x002006, 0, {0x0020}},	/* 2006; SIX-PER-EM SPACE */
+  {0x002007, 0, {0x0020}},	/* 2007; FIGURE SPACE */
+  {0x002008, 0, {0x0020}},	/* 2008; PUNCTUATION SPACE */
+  {0x002009, 0, {0x0020}},	/* 2009; THIN SPACE */
+  {0x00200A, 0, {0x0020}},	/* 200A; HAIR SPACE */
+  {0x00200B, 0, {0x0020}},	/* 200B; ZERO WIDTH SPACE */
+  {0x00202F, 0, {0x0020}},	/* 202F; NARROW NO-BREAK SPACE */
+  {0x00205F, 0, {0x0020}},	/* 205F; MEDIUM MATHEMATICAL SPACE */
+  {0x003000, 0, {0x0020}},	/* 3000; IDEOGRAPHIC SPACE */
+  {0}
+};
+
+const Stringprep_profile stringprep_saslprep[] = {
+  {STRINGPREP_MAP_TABLE, 0, stringprep_saslprep_space_map},
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_BIDI, 0, 0},
+  {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1},
+  {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
diff -Nur t/punycode.c libidn/punycode.c
--- t/punycode.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/punycode.c	2004-03-07 21:58:01.000000000 +0100
@@ -0,0 +1,456 @@
+/* punycode.c	Implementation of punycode used to ASCII encode IDN's.
+ * Copyright (C) 2002, 2003  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * This file is derived from RFC 3492bis written by Adam M. Costello.
+ *
+ * Disclaimer and license: Regarding this entire document or any
+ * portion of it (including the pseudocode and C code), the author
+ * makes no guarantees and is not responsible for any damage resulting
+ * from its use.  The author grants irrevocable permission to anyone
+ * to use, modify, and distribute it in any way that does not diminish
+ * the rights of anyone else to use, modify, and distribute it,
+ * provided that redistributed derivative works do not contain
+ * misleading author or version information.  Derivative works need
+ * not be licensed under similar terms.
+ *
+ * Copyright (C) The Internet Society (2003).  All Rights Reserved.
+ *
+ * This document and translations of it may be copied and furnished to
+ * others, and derivative works that comment on or otherwise explain it
+ * or assist in its implementation may be prepared, copied, published
+ * and distributed, in whole or in part, without restriction of any
+ * kind, provided that the above copyright notice and this paragraph are
+ * included on all such copies and derivative works.  However, this
+ * document itself may not be modified in any way, such as by removing
+ * the copyright notice or references to the Internet Society or other
+ * Internet organizations, except as needed for the purpose of
+ * developing Internet standards in which case the procedures for
+ * copyrights defined in the Internet Standards process must be
+ * followed, or as required to translate it into languages other than
+ * English.
+ *
+ * The limited permissions granted above are perpetual and will not be
+ * revoked by the Internet Society or its successors or assigns.
+ *
+ * This document and the information contained herein is provided on an
+ * "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ * TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ * HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <string.h>
+
+#include "punycode.h"
+
+/*** Bootstring parameters for Punycode ***/
+
+enum
+{ base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
+  initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
+};
+
+/* basic(cp) tests whether cp is a basic code point: */
+#define basic(cp) ((punycode_uint)(cp) < 0x80)
+
+/* delim(cp) tests whether cp is a delimiter: */
+#define delim(cp) ((cp) == delimiter)
+
+/* decode_digit(cp) returns the numeric value of a basic code */
+/* point (for use in representing integers) in the range 0 to */
+/* base-1, or base if cp does not represent a value.          */
+
+static punycode_uint
+decode_digit (punycode_uint cp)
+{
+  return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 :
+    cp - 97 < 26 ? cp - 97 : base;
+}
+
+/* encode_digit(d,flag) returns the basic code point whose value      */
+/* (when used for representing integers) is d, which needs to be in   */
+/* the range 0 to base-1.  The lowercase form is used unless flag is  */
+/* nonzero, in which case the uppercase form is used.  The behavior   */
+/* is undefined if flag is nonzero and digit d has no uppercase form. */
+
+static char
+encode_digit (punycode_uint d, int flag)
+{
+  return d + 22 + 75 * (d < 26) - ((flag != 0) << 5);
+  /*  0..25 map to ASCII a..z or A..Z */
+  /* 26..35 map to ASCII 0..9         */
+}
+
+/* flagged(bcp) tests whether a basic code point is flagged */
+/* (uppercase).  The behavior is undefined if bcp is not a  */
+/* basic code point.                                        */
+
+#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
+
+/* encode_basic(bcp,flag) forces a basic code point to lowercase */
+/* if flag is zero, uppercase if flag is nonzero, and returns    */
+/* the resulting code point.  The code point is unchanged if it  */
+/* is caseless.  The behavior is undefined if bcp is not a basic */
+/* code point.                                                   */
+
+static char
+encode_basic (punycode_uint bcp, int flag)
+{
+  bcp -= (bcp - 97 < 26) << 5;
+  return bcp + ((!flag && (bcp - 65 < 26)) << 5);
+}
+
+/*** Platform-specific constants ***/
+
+/* maxint is the maximum value of a punycode_uint variable: */
+static const punycode_uint maxint = -1;
+/* Because maxint is unsigned, -1 becomes the maximum value. */
+
+/*** Bias adaptation function ***/
+
+static punycode_uint
+adapt (punycode_uint delta, punycode_uint numpoints, int firsttime)
+{
+  punycode_uint k;
+
+  delta = firsttime ? delta / damp : delta >> 1;
+  /* delta >> 1 is a faster way of doing delta / 2 */
+  delta += delta / numpoints;
+
+  for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base)
+    {
+      delta /= base - tmin;
+    }
+
+  return k + (base - tmin + 1) * delta / (delta + skew);
+}
+
+/*** Main encode function ***/
+
+/**
+ * punycode_encode:
+ * @input_length: The number of code points in the @input array and
+ *   the number of flags in the @case_flags array.
+ * @input: An array of code points.  They are presumed to be Unicode
+ *   code points, but that is not strictly REQUIRED.  The array
+ *   contains code points, not code units.  UTF-16 uses code units
+ *   D800 through DFFF to refer to code points 10000..10FFFF.  The
+ *   code points D800..DFFF do not occur in any valid Unicode string.
+ *   The code points that can occur in Unicode strings (0..D7FF and
+ *   E000..10FFFF) are also called Unicode scalar values.
+ * @case_flags: A %NULL pointer or an array of boolean values parallel
+ *   to the @input array.  Nonzero (true, flagged) suggests that the
+ *   corresponding Unicode character be forced to uppercase after
+ *   being decoded (if possible), and zero (false, unflagged) suggests
+ *   that it be forced to lowercase (if possible).  ASCII code points
+ *   (0..7F) are encoded literally, except that ASCII letters are
+ *   forced to uppercase or lowercase according to the corresponding
+ *   case flags.  If @case_flags is a %NULL pointer then ASCII letters
+ *   are left as they are, and other code points are treated as
+ *   unflagged.
+ * @output_length: The caller passes in the maximum number of ASCII
+ *   code points that it can receive.  On successful return it will
+ *   contain the number of ASCII code points actually output.
+ * @output: An array of ASCII code points.  It is *not*
+ *   null-terminated; it will contain zeros if and only if the @input
+ *   contains zeros.  (Of course the caller can leave room for a
+ *   terminator and add one if needed.)
+ *
+ * Converts a sequence of code points (presumed to be Unicode code
+ * points) to Punycode.
+ *
+ * Return value: The return value can be any of the punycode_status
+ *   values defined above except %punycode_bad_input.  If not
+ *   %punycode_success, then @output_size and @output might contain
+ *   garbage.
+ **/
+int
+punycode_encode (size_t input_length,
+		 const punycode_uint input[],
+		 const unsigned char case_flags[],
+		 size_t * output_length, char output[])
+{
+  punycode_uint input_len, n, delta, h, b, bias, j, m, q, k, t;
+  size_t out, max_out;
+
+  /* The Punycode spec assumes that the input length is the same type */
+  /* of integer as a code point, so we need to convert the size_t to  */
+  /* a punycode_uint, which could overflow.                           */
+
+  if (input_length > maxint)
+    return punycode_overflow;
+  input_len = (punycode_uint) input_length;
+
+  /* Initialize the state: */
+
+  n = initial_n;
+  delta = 0;
+  out = 0;
+  max_out = *output_length;
+  bias = initial_bias;
+
+  /* Handle the basic code points: */
+
+  for (j = 0; j < input_len; ++j)
+    {
+      if (basic (input[j]))
+	{
+	  if (max_out - out < 2)
+	    return punycode_big_output;
+	  output[out++] = case_flags ?
+	    encode_basic (input[j], case_flags[j]) : (char) input[j];
+	}
+      /* else if (input[j] < n) return punycode_bad_input; */
+      /* (not needed for Punycode with unsigned code points) */
+    }
+
+  h = b = (punycode_uint) out;
+  /* cannot overflow because out <= input_len <= maxint */
+
+  /* h is the number of code points that have been handled, b is the  */
+  /* number of basic code points, and out is the number of ASCII code */
+  /* points that have been output.                                    */
+
+  if (b > 0)
+    output[out++] = delimiter;
+
+  /* Main encoding loop: */
+
+  while (h < input_len)
+    {
+      /* All non-basic code points < n have been     */
+      /* handled already.  Find the next larger one: */
+
+      for (m = maxint, j = 0; j < input_len; ++j)
+	{
+	  /* if (basic(input[j])) continue; */
+	  /* (not needed for Punycode) */
+	  if (input[j] >= n && input[j] < m)
+	    m = input[j];
+	}
+
+      /* Increase delta enough to advance the decoder's    */
+      /* <n,i> state to <m,0>, but guard against overflow: */
+
+      if (m - n > (maxint - delta) / (h + 1))
+	return punycode_overflow;
+      delta += (m - n) * (h + 1);
+      n = m;
+
+      for (j = 0; j < input_len; ++j)
+	{
+	  /* Punycode does not need to check whether input[j] is basic: */
+	  if (input[j] < n /* || basic(input[j]) */ )
+	    {
+	      if (++delta == 0)
+		return punycode_overflow;
+	    }
+
+	  if (input[j] == n)
+	    {
+	      /* Represent delta as a generalized variable-length integer: */
+
+	      for (q = delta, k = base;; k += base)
+		{
+		  if (out >= max_out)
+		    return punycode_big_output;
+		  t = k <= bias /* + tmin */ ? tmin :	/* +tmin not needed */
+		    k >= bias + tmax ? tmax : k - bias;
+		  if (q < t)
+		    break;
+		  output[out++] = encode_digit (t + (q - t) % (base - t), 0);
+		  q = (q - t) / (base - t);
+		}
+
+	      output[out++] = encode_digit (q, case_flags && case_flags[j]);
+	      bias = adapt (delta, h + 1, h == b);
+	      delta = 0;
+	      ++h;
+	    }
+	}
+
+      ++delta, ++n;
+    }
+
+  *output_length = out;
+  return punycode_success;
+}
+
+/*** Main decode function ***/
+
+/**
+ * punycode_decode:
+ * @input_length: The number of ASCII code points in the @input array.
+ * @input: An array of ASCII code points (0..7F).
+ * @output_length: The caller passes in the maximum number of code
+ *   points that it can receive into the @output array (which is also
+ *   the maximum number of flags that it can receive into the
+ *   @case_flags array, if @case_flags is not a %NULL pointer).  On
+ *   successful return it will contain the number of code points
+ *   actually output (which is also the number of flags actually
+ *   output, if case_flags is not a null pointer).  The decoder will
+ *   never need to output more code points than the number of ASCII
+ *   code points in the input, because of the way the encoding is
+ *   defined.  The number of code points output cannot exceed the
+ *   maximum possible value of a punycode_uint, even if the supplied
+ *   @output_length is greater than that.
+ * @output: An array of code points like the input argument of
+ *   punycode_encode() (see above).
+ * @case_flags: A %NULL pointer (if the flags are not needed by the
+ *   caller) or an array of boolean values parallel to the @output
+ *   array.  Nonzero (true, flagged) suggests that the corresponding
+ *   Unicode character be forced to uppercase by the caller (if
+ *   possible), and zero (false, unflagged) suggests that it be forced
+ *   to lowercase (if possible).  ASCII code points (0..7F) are output
+ *   already in the proper case, but their flags will be set
+ *   appropriately so that applying the flags would be harmless.
+ *
+ * Converts Punycode to a sequence of code points (presumed to be
+ * Unicode code points).
+ *
+ * Return value: The return value can be any of the punycode_status
+ *   values defined above.  If not %punycode_success, then
+ *   @output_length, @output, and @case_flags might contain garbage.
+ *
+ **/
+int
+punycode_decode (size_t input_length,
+		 const char input[],
+		 size_t * output_length,
+		 punycode_uint output[], unsigned char case_flags[])
+{
+  punycode_uint n, out, i, max_out, bias, oldi, w, k, digit, t;
+  size_t b, j, in;
+
+  /* Initialize the state: */
+
+  n = initial_n;
+  out = i = 0;
+  max_out = *output_length > maxint ? maxint
+    : (punycode_uint) * output_length;
+  bias = initial_bias;
+
+  /* Handle the basic code points:  Let b be the number of input code */
+  /* points before the last delimiter, or 0 if there is none, then    */
+  /* copy the first b code points to the output.                      */
+
+  for (b = j = 0; j < input_length; ++j)
+    if (delim (input[j]))
+      b = j;
+  if (b > max_out)
+    return punycode_big_output;
+
+  for (j = 0; j < b; ++j)
+    {
+      if (case_flags)
+	case_flags[out] = flagged (input[j]);
+      if (!basic (input[j]))
+	return punycode_bad_input;
+      output[out++] = input[j];
+    }
+
+  /* Main decoding loop:  Start just after the last delimiter if any  */
+  /* basic code points were copied; start at the beginning otherwise. */
+
+  for (in = b > 0 ? b + 1 : 0; in < input_length; ++out)
+    {
+
+      /* in is the index of the next ASCII code point to be consumed, */
+      /* and out is the number of code points in the output array.    */
+
+      /* Decode a generalized variable-length integer into delta,  */
+      /* which gets added to i.  The overflow checking is easier   */
+      /* if we increase i as we go, then subtract off its starting */
+      /* value at the end to obtain delta.                         */
+
+      for (oldi = i, w = 1, k = base;; k += base)
+	{
+	  if (in >= input_length)
+	    return punycode_bad_input;
+	  digit = decode_digit (input[in++]);
+	  if (digit >= base)
+	    return punycode_bad_input;
+	  if (digit > (maxint - i) / w)
+	    return punycode_overflow;
+	  i += digit * w;
+	  t = k <= bias /* + tmin */ ? tmin :	/* +tmin not needed */
+	    k >= bias + tmax ? tmax : k - bias;
+	  if (digit < t)
+	    break;
+	  if (w > maxint / (base - t))
+	    return punycode_overflow;
+	  w *= (base - t);
+	}
+
+      bias = adapt (i - oldi, out + 1, oldi == 0);
+
+      /* i was supposed to wrap around from out+1 to 0,   */
+      /* incrementing n each time, so we'll fix that now: */
+
+      if (i / (out + 1) > maxint - n)
+	return punycode_overflow;
+      n += i / (out + 1);
+      i %= (out + 1);
+
+      /* Insert n at position i of the output: */
+
+      /* not needed for Punycode: */
+      /* if (basic(n)) return punycode_invalid_input; */
+      if (out >= max_out)
+	return punycode_big_output;
+
+      if (case_flags)
+	{
+	  memmove (case_flags + i + 1, case_flags + i, out - i);
+	  /* Case of last ASCII code point determines case flag: */
+	  case_flags[i] = flagged (input[in - 1]);
+	}
+
+      memmove (output + i + 1, output + i, (out - i) * sizeof *output);
+      output[i++] = n;
+    }
+
+  *output_length = (size_t) out;
+  /* cannot overflow because out <= old value of *output_length */
+  return punycode_success;
+}
+
+/**
+ * punycode_uint
+ *
+ * Unicode code point data type, this is always a 32 bit unsigned
+ * integer.
+ */
+
+/**
+ * Punycode_status
+ * @PUNYCODE_SUCCESS: Successful operation.  This value is guaranteed
+ *   to always be zero, the remaining ones are only guaranteed to hold
+ *   non-zero values, for logical comparison purposes.
+ * @PUNYCODE_BAD_INPUT: Input is invalid.
+ * @PUNYCODE_BIG_OUTPUT: Output would exceed the space provided.
+ * @PUNYCODE_OVERFLOW: Input needs wider integers to process.
+ *
+ * Enumerated return codes of punycode_encode() and punycode_decode().
+ * The value 0 is guaranteed to always correspond to success.
+ */
diff -Nur t/punycode.h libidn/punycode.h
--- t/punycode.h	1970-01-01 01:00:00.000000000 +0100
+++ libidn/punycode.h	2003-11-26 19:22:27.000000000 +0100
@@ -0,0 +1,216 @@
+/* punycode.h	Declarations for punycode functions.
+ * Copyright (C) 2002, 2003  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * This file is derived from RFC 3492bis written by Adam M. Costello.
+ *
+ * Disclaimer and license: Regarding this entire document or any
+ * portion of it (including the pseudocode and C code), the author
+ * makes no guarantees and is not responsible for any damage resulting
+ * from its use.  The author grants irrevocable permission to anyone
+ * to use, modify, and distribute it in any way that does not diminish
+ * the rights of anyone else to use, modify, and distribute it,
+ * provided that redistributed derivative works do not contain
+ * misleading author or version information.  Derivative works need
+ * not be licensed under similar terms.
+ *
+ * Copyright (C) The Internet Society (2003).  All Rights Reserved.
+ *
+ * This document and translations of it may be copied and furnished to
+ * others, and derivative works that comment on or otherwise explain it
+ * or assist in its implementation may be prepared, copied, published
+ * and distributed, in whole or in part, without restriction of any
+ * kind, provided that the above copyright notice and this paragraph are
+ * included on all such copies and derivative works.  However, this
+ * document itself may not be modified in any way, such as by removing
+ * the copyright notice or references to the Internet Society or other
+ * Internet organizations, except as needed for the purpose of
+ * developing Internet standards in which case the procedures for
+ * copyrights defined in the Internet Standards process must be
+ * followed, or as required to translate it into languages other than
+ * English.
+ *
+ * The limited permissions granted above are perpetual and will not be
+ * revoked by the Internet Society or its successors or assigns.
+ *
+ * This document and the information contained herein is provided on an
+ * "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ * TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ * HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef _PUNYCODE_H
+#define _PUNYCODE_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>		/* size_t */
+#include <stdint.h>		/* uint32_t */
+
+  enum punycode_status
+  {
+    punycode_success = 0,
+    punycode_bad_input = 1,	/* Input is invalid.                       */
+    punycode_big_output = 2,	/* Output would exceed the space provided. */
+    punycode_overflow = 3	/* Wider integers needed to process input. */
+  };
+
+  typedef enum
+  {
+    PUNYCODE_SUCCESS = punycode_success,
+    PUNYCODE_BAD_INPUT = punycode_bad_input,
+    PUNYCODE_BIG_OUTPUT = punycode_big_output,
+    PUNYCODE_OVERFLOW = punycode_overflow
+  } Punycode_status;
+
+/* punycode_uint needs to be unsigned and needs to be */
+/* at least 26 bits wide.                             */
+
+  typedef uint32_t punycode_uint;
+
+  extern int punycode_encode (size_t input_length,
+			      const punycode_uint input[],
+			      const unsigned char case_flags[],
+			      size_t * output_length, char output[]);
+
+/*
+    punycode_encode() converts a sequence of code points (presumed to be
+    Unicode code points) to Punycode.
+
+    Input arguments (to be supplied by the caller):
+
+        input_length
+            The number of code points in the input array and the number
+            of flags in the case_flags array.
+
+        input
+            An array of code points.  They are presumed to be Unicode
+            code points, but that is not strictly REQUIRED.  The
+            array contains code points, not code units.  UTF-16 uses
+            code units D800 through DFFF to refer to code points
+            10000..10FFFF.  The code points D800..DFFF do not occur in
+            any valid Unicode string.  The code points that can occur in
+            Unicode strings (0..D7FF and E000..10FFFF) are also called
+            Unicode scalar values.
+
+        case_flags
+            A null pointer or an array of boolean values parallel to
+            the input array.  Nonzero (true, flagged) suggests that the
+            corresponding Unicode character be forced to uppercase after
+            being decoded (if possible), and zero (false, unflagged)
+            suggests that it be forced to lowercase (if possible).
+            ASCII code points (0..7F) are encoded literally, except that
+            ASCII letters are forced to uppercase or lowercase according
+            to the corresponding case flags.  If case_flags is a null
+            pointer then ASCII letters are left as they are, and other
+            code points are treated as unflagged.
+
+    Output arguments (to be filled in by the function):
+
+        output
+            An array of ASCII code points.  It is *not* null-terminated;
+            it will contain zeros if and only if the input contains
+            zeros.  (Of course the caller can leave room for a
+            terminator and add one if needed.)
+
+    Input/output arguments (to be supplied by the caller and overwritten
+    by the function):
+
+        output_length
+            The caller passes in the maximum number of ASCII code points
+            that it can receive.  On successful return it will contain
+            the number of ASCII code points actually output.
+
+    Return value:
+
+        Can be any of the punycode_status values defined above except
+        punycode_bad_input.  If not punycode_success, then output_size
+        and output might contain garbage.
+*/
+
+  extern int punycode_decode (size_t input_length,
+			      const char input[],
+			      size_t * output_length,
+			      punycode_uint output[],
+			      unsigned char case_flags[]);
+
+/*
+    punycode_decode() converts Punycode to a sequence of code points
+    (presumed to be Unicode code points).
+
+    Input arguments (to be supplied by the caller):
+
+        input_length
+            The number of ASCII code points in the input array.
+
+        input
+            An array of ASCII code points (0..7F).
+
+    Output arguments (to be filled in by the function):
+
+        output
+            An array of code points like the input argument of
+            punycode_encode() (see above).
+
+        case_flags
+            A null pointer (if the flags are not needed by the caller)
+            or an array of boolean values parallel to the output array.
+            Nonzero (true, flagged) suggests that the corresponding
+            Unicode character be forced to uppercase by the caller (if
+            possible), and zero (false, unflagged) suggests that it
+            be forced to lowercase (if possible).  ASCII code points
+            (0..7F) are output already in the proper case, but their
+            flags will be set appropriately so that applying the flags
+            would be harmless.
+
+    Input/output arguments (to be supplied by the caller and overwritten
+    by the function):
+
+        output_length
+            The caller passes in the maximum number of code points
+            that it can receive into the output array (which is also
+            the maximum number of flags that it can receive into the
+            case_flags array, if case_flags is not a null pointer).  On
+            successful return it will contain the number of code points
+            actually output (which is also the number of flags actually
+            output, if case_flags is not a null pointer).  The decoder
+            will never need to output more code points than the number
+            of ASCII code points in the input, because of the way the
+            encoding is defined.  The number of code points output
+            cannot exceed the maximum possible value of a punycode_uint,
+            even if the supplied output_length is greater than that.
+
+    Return value:
+
+        Can be any of the punycode_status values defined above.  If not
+        punycode_success, then output_length, output, and case_flags
+        might contain garbage.
+*/
+
+#ifdef __cplusplus
+}
+#endif
+#endif				/* _PUNYCODE_H */
diff -Nur t/stringprep.c libidn/stringprep.c
--- t/stringprep.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/stringprep.c	2004-03-07 22:57:17.000000000 +0100
@@ -0,0 +1,667 @@
+/* stringprep.c --- Core stringprep implementation.
+ * Copyright (C) 2002, 2003, 2004  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "stringprep.h"
+
+static ssize_t
+stringprep_find_character_in_table (uint32_t ucs4,
+				    const Stringprep_table_element * table)
+{
+  ssize_t i;
+
+  /* This is where typical uses of Libidn spends very close to all CPU
+     time and causes most cache misses.  One could easily do a binary
+     search instead.  Before rewriting this, I want hard evidence this
+     slowness is at all relevant in typical applications.  (I don't
+     dispute optimization may improve matters significantly, I'm
+     mostly interested in having someone give real-world benchmark on
+     the impact of libidn.) */
+
+  for (i = 0; table[i].start; i++)
+    if (ucs4 >= table[i].start &&
+	ucs4 <= (table[i].end ? table[i].end : table[i].start))
+      return i;
+
+  return -1;
+}
+
+static ssize_t
+stringprep_find_string_in_table (uint32_t * ucs4,
+				 size_t ucs4len,
+				 size_t * tablepos,
+				 const Stringprep_table_element * table)
+{
+  size_t j;
+  ssize_t pos;
+
+  for (j = 0; j < ucs4len; j++)
+    if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
+      {
+	if (tablepos)
+	  *tablepos = pos;
+	return j;
+      }
+
+  return -1;
+}
+
+static int
+stringprep_apply_table_to_string (uint32_t * ucs4,
+				  size_t * ucs4len,
+				  size_t maxucs4len,
+				  const Stringprep_table_element * table)
+{
+  ssize_t pos;
+  size_t i, maplen;
+
+  while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
+						 &i, table)) != -1)
+    {
+      for (maplen = STRINGPREP_MAX_MAP_CHARS;
+	   maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
+	;
+
+      if (*ucs4len - 1 + maplen >= maxucs4len)
+	return STRINGPREP_TOO_SMALL_BUFFER;
+
+      memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
+	       sizeof (uint32_t) * (*ucs4len - pos - 1));
+      memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
+      *ucs4len = *ucs4len - 1 + maplen;
+    }
+
+  return STRINGPREP_OK;
+}
+
+#define INVERTED(x) ((x) & ((~0UL) >> 1))
+#define UNAPPLICAPLEFLAGS(flags, profileflags) \
+  ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
+   ( INVERTED(profileflags) && (profileflags & flags)))
+
+/**
+ * stringprep_4i:
+ * @ucs4: input/output array with string to prepare.
+ * @len: on input, length of input array with Unicode code points,
+ *          on exit, length of output array with Unicode code points.
+ * @maxucs4len: maximum length of input/output array.
+ * @flags: stringprep profile flags, or 0.
+ * @profile: pointer to stringprep profile to use.
+ *
+ * Prepare the input UCS-4 string according to the stringprep profile,
+ * and write back the result to the input string.
+ *
+ * The input is not required to be zero terminated (@ucs4[@len] = 0).
+ * The output will not be zero terminated unless @ucs4[@len] = 0.
+ * Instead, see stringprep_4zi() if your input is zero terminated or
+ * if you want the output to be.
+ *
+ * Since the stringprep operation can expand the string, @maxucs4len
+ * indicate how large the buffer holding the string is.  This function
+ * will not read or write to code points outside that size.
+ *
+ * The @flags are one of Stringprep_profile_flags, or 0.
+ *
+ * The @profile contain the instructions to perform.  Your application
+ * can define new profiles, possibly re-using the generic stringprep
+ * tables that always will be part of the library, or use one of the
+ * currently supported profiles.
+ *
+ * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
+ **/
+int
+stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
+	       Stringprep_profile_flags flags,
+	       const Stringprep_profile * profile)
+{
+  size_t i, j;
+  ssize_t k;
+  size_t ucs4len = *len;
+  int rc;
+
+  for (i = 0; profile[i].operation; i++)
+    {
+      switch (profile[i].operation)
+	{
+	case STRINGPREP_NFKC:
+	  {
+	    uint32_t *q = 0;
+
+	    if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
+	      break;
+
+	    if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
+	      /* Profile requires NFKC, but callee asked for no NFKC. */
+	      return STRINGPREP_FLAG_ERROR;
+
+	    q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
+	    if (!q)
+	      return STRINGPREP_NFKC_FAILED;
+
+	    for (ucs4len = 0; q[ucs4len]; ucs4len++)
+	      ;
+
+	    if (ucs4len >= maxucs4len)
+	      {
+		free (q);
+		return STRINGPREP_TOO_SMALL_BUFFER;
+	      }
+
+	    memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
+
+	    free (q);
+	  }
+	  break;
+
+	case STRINGPREP_PROHIBIT_TABLE:
+	  k = stringprep_find_string_in_table (ucs4, ucs4len,
+					       NULL, profile[i].table);
+	  if (k != -1)
+	    return STRINGPREP_CONTAINS_PROHIBITED;
+	  break;
+
+	case STRINGPREP_UNASSIGNED_TABLE:
+	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
+	    break;
+	  if (flags & STRINGPREP_NO_UNASSIGNED)
+	    {
+	      k = stringprep_find_string_in_table
+		(ucs4, ucs4len, NULL, profile[i].table);
+	      if (k != -1)
+		return STRINGPREP_CONTAINS_UNASSIGNED;
+	    }
+	  break;
+
+	case STRINGPREP_MAP_TABLE:
+	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
+	    break;
+	  rc = stringprep_apply_table_to_string
+	    (ucs4, &ucs4len, maxucs4len, profile[i].table);
+	  if (rc != STRINGPREP_OK)
+	    return rc;
+	  break;
+
+	case STRINGPREP_BIDI_PROHIBIT_TABLE:
+	case STRINGPREP_BIDI_RAL_TABLE:
+	case STRINGPREP_BIDI_L_TABLE:
+	  break;
+
+	case STRINGPREP_BIDI:
+	  {
+	    int done_prohibited = 0;
+	    int done_ral = 0;
+	    int done_l = 0;
+	    int contains_ral = -1;
+	    int contains_l = -1;
+
+	    for (j = 0; profile[j].operation; j++)
+	      if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
+		{
+		  done_prohibited = 1;
+		  k = stringprep_find_string_in_table (ucs4, ucs4len,
+						       NULL,
+						       profile[j].table);
+		  if (k != -1)
+		    return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
+		}
+	      else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
+		{
+		  done_ral = 1;
+		  if (stringprep_find_string_in_table
+		      (ucs4, ucs4len, NULL, profile[j].table) != -1)
+		    contains_ral = j;
+		}
+	      else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
+		{
+		  done_l = 1;
+		  if (stringprep_find_string_in_table
+		      (ucs4, ucs4len, NULL, profile[j].table) != -1)
+		    contains_l = j;
+		}
+
+	    if (!done_prohibited || !done_ral || !done_l)
+	      return STRINGPREP_PROFILE_ERROR;
+
+	    if (contains_ral != -1 && contains_l != -1)
+	      return STRINGPREP_BIDI_BOTH_L_AND_RAL;
+
+	    if (contains_ral != -1)
+	      {
+		if (!(stringprep_find_character_in_table
+		      (ucs4[0], profile[contains_ral].table) != -1 &&
+		      stringprep_find_character_in_table
+		      (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
+		  return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
+	      }
+	  }
+	  break;
+
+	default:
+	  return STRINGPREP_PROFILE_ERROR;
+	  break;
+	}
+    }
+
+  *len = ucs4len;
+
+  return STRINGPREP_OK;
+}
+
+static int
+stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
+		  Stringprep_profile_flags flags,
+		  const Stringprep_profile * profile)
+{
+  int rc;
+
+  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
+  if (rc != STRINGPREP_OK)
+    return rc;
+
+  if (ucs4len >= maxucs4len)
+    return STRINGPREP_TOO_SMALL_BUFFER;
+
+  ucs4[ucs4len] = 0;
+
+  return STRINGPREP_OK;
+}
+
+/**
+ * stringprep_4zi:
+ * @ucs4: input/output array with zero terminated string to prepare.
+ * @maxucs4len: maximum length of input/output array.
+ * @flags: stringprep profile flags, or 0.
+ * @profile: pointer to stringprep profile to use.
+ *
+ * Prepare the input zero terminated UCS-4 string according to the
+ * stringprep profile, and write back the result to the input string.
+ *
+ * Since the stringprep operation can expand the string, @maxucs4len
+ * indicate how large the buffer holding the string is.  This function
+ * will not read or write to code points outside that size.
+ *
+ * The @flags are one of Stringprep_profile_flags, or 0.
+ *
+ * The @profile contain the instructions to perform.  Your application
+ * can define new profiles, possibly re-using the generic stringprep
+ * tables that always will be part of the library, or use one of the
+ * currently supported profiles.
+ *
+ * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
+ **/
+int
+stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
+		Stringprep_profile_flags flags,
+		const Stringprep_profile * profile)
+{
+  size_t ucs4len;
+
+  for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
+    ;
+
+  return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
+}
+
+/**
+ * stringprep:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ * @flags: stringprep profile flags, or 0.
+ * @profile: pointer to stringprep profile to use.
+ *
+ * Prepare the input zero terminated UTF-8 string according to the
+ * stringprep profile, and write back the result to the input string.
+ *
+ * Note that you must convert strings entered in the systems locale
+ * into UTF-8 before using this function, see
+ * stringprep_locale_to_utf8().
+ *
+ * Since the stringprep operation can expand the string, @maxlen
+ * indicate how large the buffer holding the string is.  This function
+ * will not read or write to characters outside that size.
+ *
+ * The @flags are one of Stringprep_profile_flags, or 0.
+ *
+ * The @profile contain the instructions to perform.  Your application
+ * can define new profiles, possibly re-using the generic stringprep
+ * tables that always will be part of the library, or use one of the
+ * currently supported profiles.
+ *
+ * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
+ **/
+int
+stringprep (char *in,
+	    size_t maxlen,
+	    Stringprep_profile_flags flags,
+	    const Stringprep_profile * profile)
+{
+  int rc;
+  char *utf8 = NULL;
+  uint32_t *ucs4 = NULL;
+  size_t ucs4len, maxucs4len, adducs4len = 50;
+
+  do
+    {
+      if (ucs4)
+	free (ucs4);
+      ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
+      maxucs4len = ucs4len + adducs4len;
+      ucs4 = realloc (ucs4, maxucs4len * sizeof (uint32_t));
+      if (!ucs4)
+	return STRINGPREP_MALLOC_ERROR;
+
+      rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
+      adducs4len += 50;
+    }
+  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
+  if (rc != STRINGPREP_OK)
+    {
+      free (ucs4);
+      return rc;
+    }
+
+  utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
+  free (ucs4);
+  if (!utf8)
+    return STRINGPREP_MALLOC_ERROR;
+
+  if (strlen (utf8) >= maxlen)
+    {
+      free (utf8);
+      return STRINGPREP_TOO_SMALL_BUFFER;
+    }
+
+  strcpy (in, utf8);		/* flawfinder: ignore */
+
+  free (utf8);
+
+  return STRINGPREP_OK;
+}
+
+/**
+ * stringprep_profile:
+ * @in: input array with UTF-8 string to prepare.
+ * @out: output variable with pointer to newly allocate string.
+ * @profile: name of stringprep profile to use.
+ * @flags: stringprep profile flags, or 0.
+ *
+ * Prepare the input zero terminated UTF-8 string according to the
+ * stringprep profile, and return the result in a newly allocated
+ * variable.
+ *
+ * Note that you must convert strings entered in the systems locale
+ * into UTF-8 before using this function, see
+ * stringprep_locale_to_utf8().
+ *
+ * The output @out variable must be deallocated by the caller.
+ *
+ * The @flags are one of Stringprep_profile_flags, or 0.
+ *
+ * The @profile specifies the name of the stringprep profile to use.
+ * It must be one of the internally supported stringprep profiles.
+ *
+ * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
+ **/
+int
+stringprep_profile (const char *in,
+		    char **out,
+		    const char *profile, Stringprep_profile_flags flags)
+{
+  const Stringprep_profiles *p;
+  char *str = NULL;
+  size_t len = strlen (in) + 1;
+  int rc;
+
+  for (p = &stringprep_profiles[0]; p->name; p++)
+    if (strcmp (p->name, profile) == 0)
+      break;
+
+  if (!p || !p->name || !p->tables)
+    return STRINGPREP_UNKNOWN_PROFILE;
+
+  do
+    {
+      if (str)
+	free (str);
+      str = (char *) malloc (len);
+      if (str == NULL)
+	return STRINGPREP_MALLOC_ERROR;
+
+      strcpy (str, in);
+
+      rc = stringprep (str, len, flags, p->tables);
+      len += 50;
+    }
+  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
+
+  if (rc == STRINGPREP_OK)
+    *out = str;
+  else
+    free (str);
+
+  return rc;
+}
+
+/*! \mainpage GNU Internationalized Domain Name Library
+ *
+ * \section intro Introduction
+ *
+ * GNU Libidn is an implementation of the Stringprep, Punycode and IDNA
+ * specifications defined by the IETF Internationalized Domain Names
+ * (IDN) working group, used for internationalized domain names.  The
+ * package is available under the GNU Lesser General Public License.
+ *
+ * The library contains a generic Stringprep implementation that does
+ * Unicode 3.2 NFKC normalization, mapping and prohibitation of
+ * characters, and bidirectional character handling.  Profiles for
+ * Nameprep, iSCSI, SASL and XMPP are included.  Punycode and ASCII
+ * Compatible Encoding (ACE) via IDNA are supported.  A mechanism to
+ * define Top-Level Domain (TLD) specific validation tables, and to
+ * compare strings against those tables, is included.  Default tables
+ * for some TLDs are also included.
+ *
+ * The Stringprep API consists of two main functions, one for
+ * converting data from the system's native representation into UTF-8,
+ * and one function to perform the Stringprep processing.  Adding a
+ * new Stringprep profile for your application within the API is
+ * straightforward.  The Punycode API consists of one encoding
+ * function and one decoding function.  The IDNA API consists of the
+ * ToASCII and ToUnicode functions, as well as an high-level interface
+ * for converting entire domain names to and from the ACE encoded
+ * form.  The TLD API consists of one set of functions to extract the
+ * TLD name from a domain string, one set of functions to locate the
+ * proper TLD table to use based on the TLD name, and core functions
+ * to validate a string against a TLD table, and some utility wrappers
+ * to perform all the steps in one call.
+ *
+ * The library is used by, e.g., GNU SASL and Shishi to process user
+ * names and passwords.  Libidn can be built into GNU Libc to enable a
+ * new system-wide getaddrinfo() flag for IDN processing.
+ *
+ * Libidn is developed for the GNU/Linux system, but runs on over 20 Unix
+ * platforms (including Solaris, IRIX, AIX, and Tru64) and Windows.
+ * Libidn is written in C and (parts of) the API is accessible from C,
+ * C++, Emacs Lisp, Python and Java.
+ *
+ * The project web page:\n
+ * http://www.gnu.org/software/libidn/
+ *
+ * The software archive:\n
+ * ftp://alpha.gnu.org/pub/gnu/libidn/
+ *
+ * For more information see:\n
+ * http://www.ietf.org/html.charters/idn-charter.html\n
+ * http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)\n
+ * http://www.ietf.org/rfc/rfc3490.txt (idna specification)\n
+ * http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)\n
+ * http://www.ietf.org/rfc/rfc3492.txt (punycode specification)\n
+ * http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt\n
+ * http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt\n
+ * http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt\n
+ * http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt\n
+ * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt\n
+ * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt\n
+ *
+ * Further information and paid contract development:\n
+ * Simon Josefsson <simon@josefsson.org>
+ *
+ * \section examples Examples
+ *
+ * \include example.c
+ * \include example3.c
+ * \include example4.c
+ * \include example5.c
+ */
+
+/**
+ * STRINGPREP_VERSION
+ *
+ * String defined via CPP denoting the header file version number.
+ * Used together with stringprep_check_version() to verify header file
+ * and run-time library consistency.
+ */
+
+/**
+ * STRINGPREP_MAX_MAP_CHARS
+ *
+ * Maximum number of code points that can replace a single code point,
+ * during stringprep mapping.
+ */
+
+/**
+ * Stringprep_rc:
+ * @STRINGPREP_OK: Successful operation.  This value is guaranteed to
+ *   always be zero, the remaining ones are only guaranteed to hold
+ *   non-zero values, for logical comparison purposes.
+ * @STRINGPREP_CONTAINS_UNASSIGNED: String contain unassigned Unicode
+ *   code points, which is forbidden by the profile.
+ * @STRINGPREP_CONTAINS_PROHIBITED: String contain code points
+ *   prohibited by the profile.
+ * @STRINGPREP_BIDI_BOTH_L_AND_RAL: String contain code points with
+ *   conflicting bidirection category.
+ * @STRINGPREP_BIDI_LEADTRAIL_NOT_RAL: Leading and trailing character
+ *   in string not of proper bidirectional category.
+ * @STRINGPREP_BIDI_CONTAINS_PROHIBITED: Contains prohibited code
+ *   points detected by bidirectional code.
+ * @STRINGPREP_TOO_SMALL_BUFFER: Buffer handed to function was too
+ *   small.  This usually indicate a problem in the calling
+ *   application.
+ * @STRINGPREP_PROFILE_ERROR: The stringprep profile was inconsistent.
+ *   This usually indicate an internal error in the library.
+ * @STRINGPREP_FLAG_ERROR: The supplied flag conflicted with profile.
+ *   This usually indicate a problem in the calling application.
+ * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not
+ *   known to the library.
+ * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed.  This
+ *   usually indicate an internal error in the library.
+ * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory.  This is
+ *   usually a fatal error.
+ *
+ * Enumerated return codes of stringprep(), stringprep_profile()
+ * functions (and macros using those functions).  The value 0 is
+ * guaranteed to always correspond to success.
+ */
+
+/**
+ * Stringprep_profile_flags:
+ * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
+ *   selecting the non-NFKC case folding tables.  Usually the profile
+ *   specifies BIDI and NFKC settings, and applications should not
+ *   override it unless in special situations.
+ * @STRINGPREP_NO_BIDI: Disable the BIDI step.  Usually the profile
+ *   specifies BIDI and NFKC settings, and applications should not
+ *   override it unless in special situations.
+ * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
+ *   string contains unassigned characters according to profile.
+ *
+ * Stringprep profile flags.
+ */
+
+/**
+ * Stringprep_profile_steps:
+ *
+ * Various steps in the stringprep algorithm.  You really want to
+ * study the source code to understand this one.  Only useful if you
+ * want to add another profile.
+ */
+
+/**
+ * stringprep_nameprep:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ *
+ * Prepare the input UTF-8 string according to the nameprep profile.
+ * The AllowUnassigned flag is true, use
+ * stringprep_nameprep_no_unassigned() if you want a false
+ * AllowUnassigned.  Returns 0 iff successful, or an error code.
+ **/
+
+/**
+ * stringprep_nameprep_no_unassigned:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ *
+ * Prepare the input UTF-8 string according to the nameprep profile.
+ * The AllowUnassigned flag is false, use stringprep_nameprep() for
+ * true AllowUnassigned.  Returns 0 iff successful, or an error code.
+ **/
+
+/**
+ * stringprep_iscsi:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ *
+ * Prepare the input UTF-8 string according to the draft iSCSI
+ * stringprep profile.  Returns 0 iff successful, or an error code.
+ **/
+
+/**
+ * stringprep_plain:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ *
+ * Prepare the input UTF-8 string according to the draft SASL
+ * ANONYMOUS profile.  Returns 0 iff successful, or an error code.
+ **/
+
+/**
+ * stringprep_xmpp_nodeprep:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ *
+ * Prepare the input UTF-8 string according to the draft XMPP node
+ * identifier profile.  Returns 0 iff successful, or an error code.
+ **/
+
+/**
+ * stringprep_xmpp_resourceprep:
+ * @in: input/ouput array with string to prepare.
+ * @maxlen: maximum length of input/output array.
+ *
+ * Prepare the input UTF-8 string according to the draft XMPP resource
+ * identifier profile.  Returns 0 iff successful, or an error code.
+ **/
diff -Nur t/stringprep.h libidn/stringprep.h
--- t/stringprep.h	1970-01-01 01:00:00.000000000 +0100
+++ libidn/stringprep.h	2004-03-07 22:57:28.000000000 +0100
@@ -0,0 +1,211 @@
+/* stringprep.h		Header file for stringprep functions.         -*- c -*-
+ * Copyright (C) 2002, 2003  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _STRINGPREP_H
+#define _STRINGPREP_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>		/* size_t */
+#include <unistd.h>		/* ssize_t */
+#include <idn-int.h>		/* uint32_t */
+
+#define STRINGPREP_VERSION "0.4.1"
+
+/* Error codes. */
+  typedef enum
+  {
+    STRINGPREP_OK = 0,
+    /* Stringprep errors. */
+    STRINGPREP_CONTAINS_UNASSIGNED = 1,
+    STRINGPREP_CONTAINS_PROHIBITED = 2,
+    STRINGPREP_BIDI_BOTH_L_AND_RAL = 3,
+    STRINGPREP_BIDI_LEADTRAIL_NOT_RAL = 4,
+    STRINGPREP_BIDI_CONTAINS_PROHIBITED = 5,
+    /* Error in calling application. */
+    STRINGPREP_TOO_SMALL_BUFFER = 100,
+    STRINGPREP_PROFILE_ERROR = 101,
+    STRINGPREP_FLAG_ERROR = 102,
+    STRINGPREP_UNKNOWN_PROFILE = 103,
+    /* Internal errors. */
+    STRINGPREP_NFKC_FAILED = 200,
+    STRINGPREP_MALLOC_ERROR = 201
+  } Stringprep_rc;
+
+/* Flags used when calling stringprep(). */
+  typedef enum
+  {
+    STRINGPREP_NO_NFKC = 1,
+    STRINGPREP_NO_BIDI = 2,
+    STRINGPREP_NO_UNASSIGNED = 4
+  } Stringprep_profile_flags;
+
+/* Steps in a stringprep profile. */
+  typedef enum
+  {
+    STRINGPREP_NFKC = 1,
+    STRINGPREP_BIDI = 2,
+    STRINGPREP_MAP_TABLE = 3,
+    STRINGPREP_UNASSIGNED_TABLE = 4,
+    STRINGPREP_PROHIBIT_TABLE = 5,
+    STRINGPREP_BIDI_PROHIBIT_TABLE = 6,
+    STRINGPREP_BIDI_RAL_TABLE = 7,
+    STRINGPREP_BIDI_L_TABLE = 8
+  } Stringprep_profile_steps;
+
+#define STRINGPREP_MAX_MAP_CHARS 4
+
+  struct Stringprep_table_element
+  {
+    uint32_t start;
+    uint32_t end;		/* 0 if only one character */
+    uint32_t map[STRINGPREP_MAX_MAP_CHARS];	/* NULL if end is not 0 */
+  };
+  typedef struct Stringprep_table_element Stringprep_table_element;
+
+  struct Stringprep_table
+  {
+    Stringprep_profile_steps operation;
+    Stringprep_profile_flags flags;
+    const Stringprep_table_element *table;
+  };
+  typedef struct Stringprep_table Stringprep_profile;
+
+  struct Stringprep_profiles
+  {
+    char *name;
+    const Stringprep_profile *tables;
+  };
+  typedef struct Stringprep_profiles Stringprep_profiles;
+
+  extern const Stringprep_profiles stringprep_profiles[];
+
+/* Profiles */
+  extern const Stringprep_table_element stringprep_rfc3454_A_1[];
+  extern const Stringprep_table_element stringprep_rfc3454_B_1[];
+  extern const Stringprep_table_element stringprep_rfc3454_B_2[];
+  extern const Stringprep_table_element stringprep_rfc3454_B_3[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_1_1[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_1_2[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_2_1[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_2_2[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_3[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_4[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_5[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_6[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_7[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_8[];
+  extern const Stringprep_table_element stringprep_rfc3454_C_9[];
+  extern const Stringprep_table_element stringprep_rfc3454_D_1[];
+  extern const Stringprep_table_element stringprep_rfc3454_D_2[];
+
+  /* Nameprep */
+
+  extern const Stringprep_profile stringprep_nameprep[];
+
+#define stringprep_nameprep(in, maxlen)			\
+  stringprep(in, maxlen, 0, stringprep_nameprep)
+
+#define stringprep_nameprep_no_unassigned(in, maxlen)			\
+  stringprep(in, maxlen, STRINGPREP_NO_UNASSIGNED, stringprep_nameprep)
+
+  /* SASL */
+
+  extern const Stringprep_profile stringprep_saslprep[];
+  extern const Stringprep_profile stringprep_plain[];
+  extern const Stringprep_profile stringprep_trace[];
+
+#define stringprep_plain(in, maxlen)		\
+  stringprep(in, maxlen, 0, stringprep_plain)
+
+  /* Kerberos */
+
+  extern const Stringprep_profile stringprep_kerberos5[];
+
+#define stringprep_kerberos5(in, maxlen)		\
+  stringprep(in, maxlen, 0, stringprep_kerberos5)
+
+  /* XMPP */
+
+  extern const Stringprep_profile stringprep_xmpp_nodeprep[];
+  extern const Stringprep_profile stringprep_xmpp_resourceprep[];
+  extern const Stringprep_table_element stringprep_xmpp_nodeprep_prohibit[];
+
+#define stringprep_xmpp_nodeprep(in, maxlen)		\
+  stringprep(in, maxlen, 0, stringprep_xmpp_nodeprep)
+#define stringprep_xmpp_resourceprep(in, maxlen)		\
+  stringprep(in, maxlen, 0, stringprep_xmpp_resourceprep)
+
+  /* iSCSI */
+
+  extern const Stringprep_profile stringprep_iscsi[];
+
+#define stringprep_iscsi(in, maxlen)		\
+  stringprep(in, maxlen, 0, stringprep_iscsi)
+
+  /* API */
+
+  extern int stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
+			    Stringprep_profile_flags flags,
+			    const Stringprep_profile * profile);
+  extern int stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
+			     Stringprep_profile_flags flags,
+			     const Stringprep_profile * profile);
+  extern int stringprep (char *in, size_t maxlen,
+			 Stringprep_profile_flags flags,
+			 const Stringprep_profile * profile);
+
+  extern int stringprep_profile (const char *in,
+				 char **out,
+				 const char *profile,
+				 Stringprep_profile_flags flags);
+
+  extern const char *stringprep_check_version (const char *req_version);
+
+/* Utility */
+
+  extern int stringprep_unichar_to_utf8 (uint32_t c, char *outbuf);
+  extern uint32_t stringprep_utf8_to_unichar (const char *p);
+
+  extern uint32_t *stringprep_utf8_to_ucs4 (const char *str, ssize_t len,
+					    size_t * items_written);
+  extern char *stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len,
+					size_t * items_read,
+					size_t * items_written);
+
+  extern char *stringprep_utf8_nfkc_normalize (const char *str, ssize_t len);
+  extern uint32_t *stringprep_ucs4_nfkc_normalize (uint32_t * str,
+						   ssize_t len);
+
+  extern const char *stringprep_locale_charset (void);
+  extern char *stringprep_convert (const char *str,
+				   const char *to_codeset,
+				   const char *from_codeset);
+  extern char *stringprep_locale_to_utf8 (const char *str);
+  extern char *stringprep_utf8_to_locale (const char *str);
+
+#ifdef __cplusplus
+}
+#endif
+#endif				/* _STRINGPREP_H */
diff -Nur t/toutf8.c libidn/toutf8.c
--- t/toutf8.c	1970-01-01 01:00:00.000000000 +0100
+++ libidn/toutf8.c	2004-03-07 23:07:01.000000000 +0100
@@ -0,0 +1,270 @@
+/* toutf8.c	Convert strings from system locale into UTF-8.
+ * Copyright (C) 2002, 2003, 2004  Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "stringprep.h"
+
+#if 0
+# define HAVE_ICONV 1
+# define LOCALE_WORKS 1
+# define ICONV_CONST
+#endif
+
+#ifdef HAVE_ICONV
+# include <iconv.h>
+
+# if LOCALE_WORKS
+#  include <langinfo.h>
+#  include <locale.h>
+# endif
+
+static const char *
+stringprep_locale_charset_slow (void)
+{
+  const char *charset = getenv ("CHARSET");	/* flawfinder: ignore */
+
+  if (charset && *charset)
+    return charset;
+
+# ifdef LOCALE_WORKS
+  {
+    char *p;
+
+    p = setlocale (LC_CTYPE, NULL);
+    setlocale (LC_CTYPE, "");
+
+    charset = nl_langinfo (CODESET);
+
+    setlocale (LC_CTYPE, p);
+
+    if (charset && *charset)
+      return charset;
+  }
+# endif
+
+  return "ASCII";
+}
+
+static const char *stringprep_locale_charset_cache = NULL;
+
+/**
+ * stringprep_locale_charset:
+ *
+ * Find out system locale charset.
+ *
+ * Note that this function return what it believe the SYSTEM is using
+ * as a locale, not what locale the program is currently in (modified,
+ * e.g., by a setlocale(LC_CTYPE, "ISO-8859-1")).  The reason is that
+ * data read from argv[], stdin etc comes from the system, and is more
+ * likely to be encoded using the system locale than the program
+ * locale.
+ *
+ * You can set the environment variable CHARSET to override the value
+ * returned.  Note that this function caches the result, so you will
+ * have to modify CHARSET before calling (even indirectly) any
+ * stringprep functions, e.g., by setting it when invoking the
+ * application.
+ *
+ * Return value: Return the character set used by the system locale.
+ *   It will never return NULL, but use "ASCII" as a fallback.
+ **/
+const char *
+stringprep_locale_charset (void)
+{
+  if (!stringprep_locale_charset_cache)
+    stringprep_locale_charset_cache = stringprep_locale_charset_slow ();
+
+  return stringprep_locale_charset_cache;
+}
+
+/**
+ * stringprep_convert:
+ * @str: input zero-terminated string.
+ * @to_codeset: name of destination character set.
+ * @from_codeset: name of origin character set, as used by @str.
+ *
+ * Convert the string from one character set to another using the
+ * system's iconv() function.
+ *
+ * Return value: Returns newly allocated zero-terminated string which
+ *   is @str transcoded into to_codeset.
+ **/
+char *
+stringprep_convert (const char *str,
+		    const char *to_codeset, const char *from_codeset)
+{
+  iconv_t cd;
+  char *dest;
+  char *outp;
+  char *p, *startp;
+  size_t inbytes_remaining;
+  size_t outbytes_remaining;
+  size_t err;
+  size_t outbuf_size;
+  int have_error = 0;
+  int len;
+
+  if (strcmp (to_codeset, from_codeset) == 0)
+    {
+      char *p;
+      p = malloc (strlen (str) + 1);
+      if (!p)
+	return NULL;
+      strcpy (p, str);
+      return p;
+    }
+
+  cd = iconv_open (to_codeset, from_codeset);
+
+  if (cd == (iconv_t) - 1)
+    return NULL;
+
+  p = (char *) malloc (strlen (str) + 1);
+  strcpy (p, str);
+  if (p == NULL)
+    return NULL;
+  len = strlen (p);
+  startp = p;
+  inbytes_remaining = len;
+  outbuf_size = len + 1;	/* + 1 for nul in case len == 1 */
+
+  outbytes_remaining = outbuf_size - 1;	/* -1 for nul */
+  outp = dest = malloc (outbuf_size);
+
+again:
+
+  err = iconv (cd, (ICONV_CONST char **) &p, &inbytes_remaining,
+	       &outp, &outbytes_remaining);
+
+  if (err == (size_t) - 1)
+    {
+      switch (errno)
+	{
+	case EINVAL:
+	  /* Incomplete text, do not report an error */
+	  break;
+
+	case E2BIG:
+	  {
+	    size_t used = outp - dest;
+
+	    outbuf_size *= 2;
+	    dest = realloc (dest, outbuf_size);
+
+	    outp = dest + used;
+	    outbytes_remaining = outbuf_size - used - 1;	/* -1 for nul */
+
+	    goto again;
+	  }
+	  break;
+
+	case EILSEQ:
+	  have_error = 1;
+	  break;
+
+	default:
+	  have_error = 1;
+	  break;
+	}
+    }
+
+  *outp = '\0';
+
+  if ((p - startp) != len)
+    have_error = 1;
+
+
+  free (startp);
+
+  iconv_close (cd);
+
+  if (have_error)
+    {
+      free (dest);
+      dest = NULL;
+    }
+
+  return dest;
+}
+
+#else /* HAVE_ICONV */
+
+const char *
+stringprep_locale_charset ()
+{
+  return "ASCII";
+}
+
+char *
+stringprep_convert (const char *str,
+		    const char *to_codeset, const char *from_codeset)
+{
+  char *p;
+  fprintf (stderr, "libidn: warning: libiconv not installed, cannot "
+	   "convert data to UTF-8\n");
+  p = malloc (strlen (str) + 1);
+  if (!p)
+    return NULL;
+  strcpy (p, str);
+  return p;
+}
+
+#endif /* HAVE_ICONV */
+
+/**
+ * stringprep_locale_to_utf8:
+ * @str: input zero terminated string.
+ *
+ * Convert string encoded in the locale's character set into UTF-8 by
+ * using stringprep_convert().
+ *
+ * Return value: Returns newly allocated zero-terminated string which
+ *   is @str transcoded into UTF-8.
+ **/
+char *
+stringprep_locale_to_utf8 (const char *str)
+{
+  return stringprep_convert (str, "UTF-8", stringprep_locale_charset ());
+}
+
+/**
+ * stringprep_utf8_to_locale:
+ * @str: input zero terminated string.
+ *
+ * Convert string encoded in UTF-8 into the locale's character set by
+ * using stringprep_convert().
+ *
+ * Return value: Returns newly allocated zero-terminated string which
+ *   is @str transcoded into the locale's character set.
+ **/
+char *
+stringprep_utf8_to_locale (const char *str)
+{
+  return stringprep_convert (str, stringprep_locale_charset (), "UTF-8");
+}
diff -Nur t/Versions libidn/Versions
--- t/Versions	1970-01-01 01:00:00.000000000 +0100
+++ libidn/Versions	2003-11-26 22:23:47.000000000 +0100
@@ -0,0 +1,6 @@
+libc {
+  GLIBC_PRIVATE {
+    # Internal libc interface to getaddrinfo
+    idna_to_ascii_lz;
+  }
+}
Follow-Ups:
- Re: RFC: IDN support in getaddrinfo().
  - From: Ulrich Drepper
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]