This is the mail archive of the
binutils@sourceware.cygnus.com
mailing list for the binutils project.
[PATCH] UTF-8 support to windres
- To: binutils at sourceware dot cygnus dot com
- Subject: [PATCH] UTF-8 support to windres
- From: SONE Takeshi <ts1 at cma dot co dot jp>
- Date: Mon, 6 Mar 2000 17:01:47 +0900
Hi,
I fixed multi-byte character handling in windres and added support to
read/write .rc files encoded in UTF-8.
I hope this will be included in future releases of binutils.
Regards,
Takeshi
--
SONE Takeshi そね たけし
mailto:ts1@cma.co.jp Office Craftsman Arts
http://www.cma.co.jp/~ts1/
diff -ur binutils-19990818/binutils/windres.c binutils-ts1/binutils/windres.c
--- binutils-19990818/binutils/windres.c Tue Jan 11 07:44:55 2000
+++ binutils-ts1/binutils/windres.c Thu Feb 17 19:09:34 2000
@@ -125,6 +125,7 @@
{"include-dir", required_argument, 0, OPTION_INCLUDE_DIR},
{"input-format", required_argument, 0, 'I'},
{"language", required_argument, 0, OPTION_LANGUAGE},
+ {"utf8", no_argument, 0, 'u'},
{"output-format", required_argument, 0, 'O'},
{"preprocessor", required_argument, 0, OPTION_PREPROCESSOR},
{"target", required_argument, 0, 'F'},
@@ -718,6 +719,7 @@
Define SYM when preprocessing rc file\n\
-v Verbose - tells you what it's doing\n\
--language VAL Set language when reading rc file\n\
+ -u, --utf8 Use UTF-8 encoding in rc file\n\
--use-temp-file Use a temporary file instead of popen to read\n\
the preprocessor output\n\
--no-use-temp-file Use popen (default)\n"));
@@ -810,7 +812,7 @@
language = -1;
use_temp_file = 0;
- while ((c = getopt_long (argc, argv, "i:o:I:O:F:D:v", long_options,
+ while ((c = getopt_long (argc, argv, "i:o:I:O:F:D:vu", long_options,
(int *) 0)) != EOF)
{
switch (c)
@@ -897,6 +899,10 @@
case OPTION_LANGUAGE:
language = strtol (optarg, (char **) NULL, 16);
+ break;
+
+ case 'u':
+ use_utf8 = 1;
break;
case OPTION_USE_TEMP_FILE:
diff -ur binutils-19990818/binutils/winduni.c binutils-ts1/binutils/winduni.c
--- binutils-19990818/binutils/winduni.c Tue Jan 11 07:32:26 2000
+++ binutils-ts1/binutils/winduni.c Mon Feb 21 22:34:02 2000
@@ -27,6 +27,9 @@
hosts, but that seems better than not really supporting unicode at
all. */
+/* UTF-8 support and Win32 fix by SONE Takeshi <ts1@cma.co.jp>,
+ Office Craftsman Arts. */
+
#include "bfd.h"
#include "bucomm.h"
#include "winduni.h"
@@ -37,6 +40,20 @@
#include <windows.h>
#endif
+int use_utf8;
+
+static void
+invalid_utf(void)
+{
+ fatal(_("invalid UTF-8 sequence"));
+}
+
+static void
+too_big_unicode(void)
+{
+ fatal(_("unicode >U+FFFF is not supported"));
+}
+
/* Convert an ASCII string to a unicode string. We just copy it,
expanding chars to shorts, rather than doing something intelligent. */
@@ -47,31 +64,76 @@
const char *ascii;
{
int len;
- const char *s;
+ const unsigned char *s;
unsigned short *w;
- len = strlen (ascii);
+ if (use_utf8)
+ for (s = ascii, len = 0; *s != '\0'; s++, len++)
+ switch ((*s >> 4) & 0xf) {
+ case 0x8: case 0x9: case 0xa: case 0xb:
+ invalid_utf();
+ break;
+ case 0xc: case 0xd:
+ if ((*++s & 0xc0) != 0x80)
+ invalid_utf();
+ break;
+ case 0xe:
+ if ((*++s & 0xc0) != 0x80)
+ invalid_utf();
+ if ((*++s & 0xc0) != 0x80)
+ invalid_utf();
+ break;
+ case 0xf:
+ too_big_unicode();
+ break;
+ }
+ else
+#ifdef _WIN32
+ len = MultiByteToWideChar (CP_ACP, 0, ascii, -1, NULL, 0) - 1;
+#else
+ len = strlen (ascii);
+#endif
if (length != NULL)
*length = len;
*unicode = ((unichar *) res_alloc ((len + 1) * sizeof (unichar)));
+ if (use_utf8)
+ {
+ for (s = ascii, w = *unicode; *s != '\0'; s++, w++)
+ if (*s & 0x80)
+ {
+ if (*s & 0x20)
+ {
+ *w = ((s[0] & 0x0f)<<12) | ((s[1] & 0x3f)<<6) | (s[2] & 0x3f);
+ s += 2;
+ }
+ else
+ {
+ *w = ((s[0] & 0x1f)<<6) | (s[1] & 0x3f);
+ s++;
+ }
+ }
+ else
+ *w = *s & 0xff;
+ *w = 0;
+ }
+ else
+ {
#ifdef _WIN32
- /* FIXME: On Windows, we should be using MultiByteToWideChar to set
- the length. */
- MultiByteToWideChar (CP_ACP, 0, ascii, len + 1, *unicode, len + 1);
+ MultiByteToWideChar (CP_ACP, 0, ascii, -1, *unicode, len + 1);
#else
- for (s = ascii, w = *unicode; *s != '\0'; s++, w++)
- *w = *s & 0xff;
- *w = 0;
+ for (s = ascii, w = *unicode; *s != '\0'; s++, w++)
+ *w = *s & 0xff;
+ *w = 0;
#endif
+ }
}
/* Print the unicode string UNICODE to the file E. LENGTH is the
number of characters to print, or -1 if we should print until the
- end of the string. FIXME: On a Windows host, we should be calling
- some Windows function, probably WideCharToMultiByte. */
+ end of the string. */
void
unicode_print (e, unicode, length)
@@ -82,6 +144,10 @@
while (1)
{
unichar ch;
+#ifdef _WIN32
+ char mbchar[8];
+ int mblen;
+#endif
if (length == 0)
return;
@@ -95,7 +161,7 @@
++unicode;
- if ((ch & 0x7f) == ch)
+ if (!(ch & ~0x7f))
{
if (ch == '\\')
fputs ("\\", e);
@@ -139,9 +205,32 @@
}
}
}
- else if ((ch & 0xff) == ch)
+ else if (use_utf8)
+ {
+ if (ch < 0x800)
+ {
+ putc (((ch>>6) & 0x1f) | 0xc0, e);
+ putc ((ch & 0x3f) | 0x80, e);
+ }
+ else
+ {
+ putc (((ch>>12) & 0x0f) | 0xe0, e);
+ putc (((ch>>6) & 0x3f) | 0x80, e);
+ putc ((ch & 0x3f) | 0x80, e);
+ }
+ }
+#ifdef _WIN32
+ else if ((mblen = WideCharToMultiByte (CP_ACP,
+ 0, &ch, 1, mbchar, sizeof mbchar, NULL, NULL)) != 0)
+ {
+ mbchar[mblen] = '\0';
+ fputs (mbchar, e);
+ }
+#else
+ else if (!(ch & ~0xff))
fprintf (e, "\\%03o", (unsigned int) ch);
else
fprintf (e, "\\x%x", (unsigned int) ch);
+#endif
}
}
diff -ur binutils-19990818/binutils/winduni.h binutils-ts1/binutils/winduni.h
--- binutils-19990818/binutils/winduni.h Tue Jan 11 07:32:26 2000
+++ binutils-ts1/binutils/winduni.h Thu Feb 17 17:34:27 2000
@@ -58,3 +58,7 @@
file, but it hardly seems worth it for one function. */
extern PTR res_alloc PARAMS ((size_t));
+
+/* Nonzero if we convert to/from UTF-8, rather than (incorrect) ASCII. */
+
+extern int use_utf8;