This is the mail archive of the binutils@sourceware.cygnus.com mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[PATCH] UTF-8 support to windres


Hi, I fixed multi-byte character handling in windres and added support to read/write .rc files encoded in UTF-8. I hope this will be included in future releases of binutils. Regards, Takeshi -- SONE Takeshi そね たけし mailto:ts1@cma.co.jp Office Craftsman Arts http://www.cma.co.jp/~ts1/ diff -ur binutils-19990818/binutils/windres.c binutils-ts1/binutils/windres.c --- binutils-19990818/binutils/windres.c Tue Jan 11 07:44:55 2000 +++ binutils-ts1/binutils/windres.c Thu Feb 17 19:09:34 2000 @@ -125,6 +125,7 @@ {"include-dir", required_argument, 0, OPTION_INCLUDE_DIR}, {"input-format", required_argument, 0, 'I'}, {"language", required_argument, 0, OPTION_LANGUAGE}, + {"utf8", no_argument, 0, 'u'}, {"output-format", required_argument, 0, 'O'}, {"preprocessor", required_argument, 0, OPTION_PREPROCESSOR}, {"target", required_argument, 0, 'F'}, @@ -718,6 +719,7 @@ Define SYM when preprocessing rc file\n\ -v Verbose - tells you what it's doing\n\ --language VAL Set language when reading rc file\n\ + -u, --utf8 Use UTF-8 encoding in rc file\n\ --use-temp-file Use a temporary file instead of popen to read\n\ the preprocessor output\n\ --no-use-temp-file Use popen (default)\n")); @@ -810,7 +812,7 @@ language = -1; use_temp_file = 0; - while ((c = getopt_long (argc, argv, "i:o:I:O:F:D:v", long_options, + while ((c = getopt_long (argc, argv, "i:o:I:O:F:D:vu", long_options, (int *) 0)) != EOF) { switch (c) @@ -897,6 +899,10 @@ case OPTION_LANGUAGE: language = strtol (optarg, (char **) NULL, 16); + break; + + case 'u': + use_utf8 = 1; break; case OPTION_USE_TEMP_FILE: diff -ur binutils-19990818/binutils/winduni.c binutils-ts1/binutils/winduni.c --- binutils-19990818/binutils/winduni.c Tue Jan 11 07:32:26 2000 +++ binutils-ts1/binutils/winduni.c Mon Feb 21 22:34:02 2000 @@ -27,6 +27,9 @@ hosts, but that seems better than not really supporting unicode at all. */ +/* UTF-8 support and Win32 fix by SONE Takeshi <ts1@cma.co.jp>, + Office Craftsman Arts. */ + #include "bfd.h" #include "bucomm.h" #include "winduni.h" @@ -37,6 +40,20 @@ #include <windows.h> #endif +int use_utf8; + +static void +invalid_utf(void) +{ + fatal(_("invalid UTF-8 sequence")); +} + +static void +too_big_unicode(void) +{ + fatal(_("unicode >U+FFFF is not supported")); +} + /* Convert an ASCII string to a unicode string. We just copy it, expanding chars to shorts, rather than doing something intelligent. */ @@ -47,31 +64,76 @@ const char *ascii; { int len; - const char *s; + const unsigned char *s; unsigned short *w; - len = strlen (ascii); + if (use_utf8) + for (s = ascii, len = 0; *s != '\0'; s++, len++) + switch ((*s >> 4) & 0xf) { + case 0x8: case 0x9: case 0xa: case 0xb: + invalid_utf(); + break; + case 0xc: case 0xd: + if ((*++s & 0xc0) != 0x80) + invalid_utf(); + break; + case 0xe: + if ((*++s & 0xc0) != 0x80) + invalid_utf(); + if ((*++s & 0xc0) != 0x80) + invalid_utf(); + break; + case 0xf: + too_big_unicode(); + break; + } + else +#ifdef _WIN32 + len = MultiByteToWideChar (CP_ACP, 0, ascii, -1, NULL, 0) - 1; +#else + len = strlen (ascii); +#endif if (length != NULL) *length = len; *unicode = ((unichar *) res_alloc ((len + 1) * sizeof (unichar))); + if (use_utf8) + { + for (s = ascii, w = *unicode; *s != '\0'; s++, w++) + if (*s & 0x80) + { + if (*s & 0x20) + { + *w = ((s[0] & 0x0f)<<12) | ((s[1] & 0x3f)<<6) | (s[2] & 0x3f); + s += 2; + } + else + { + *w = ((s[0] & 0x1f)<<6) | (s[1] & 0x3f); + s++; + } + } + else + *w = *s & 0xff; + *w = 0; + } + else + { #ifdef _WIN32 - /* FIXME: On Windows, we should be using MultiByteToWideChar to set - the length. */ - MultiByteToWideChar (CP_ACP, 0, ascii, len + 1, *unicode, len + 1); + MultiByteToWideChar (CP_ACP, 0, ascii, -1, *unicode, len + 1); #else - for (s = ascii, w = *unicode; *s != '\0'; s++, w++) - *w = *s & 0xff; - *w = 0; + for (s = ascii, w = *unicode; *s != '\0'; s++, w++) + *w = *s & 0xff; + *w = 0; #endif + } } /* Print the unicode string UNICODE to the file E. LENGTH is the number of characters to print, or -1 if we should print until the - end of the string. FIXME: On a Windows host, we should be calling - some Windows function, probably WideCharToMultiByte. */ + end of the string. */ void unicode_print (e, unicode, length) @@ -82,6 +144,10 @@ while (1) { unichar ch; +#ifdef _WIN32 + char mbchar[8]; + int mblen; +#endif if (length == 0) return; @@ -95,7 +161,7 @@ ++unicode; - if ((ch & 0x7f) == ch) + if (!(ch & ~0x7f)) { if (ch == '\\') fputs ("\\", e); @@ -139,9 +205,32 @@ } } } - else if ((ch & 0xff) == ch) + else if (use_utf8) + { + if (ch < 0x800) + { + putc (((ch>>6) & 0x1f) | 0xc0, e); + putc ((ch & 0x3f) | 0x80, e); + } + else + { + putc (((ch>>12) & 0x0f) | 0xe0, e); + putc (((ch>>6) & 0x3f) | 0x80, e); + putc ((ch & 0x3f) | 0x80, e); + } + } +#ifdef _WIN32 + else if ((mblen = WideCharToMultiByte (CP_ACP, + 0, &ch, 1, mbchar, sizeof mbchar, NULL, NULL)) != 0) + { + mbchar[mblen] = '\0'; + fputs (mbchar, e); + } +#else + else if (!(ch & ~0xff)) fprintf (e, "\\%03o", (unsigned int) ch); else fprintf (e, "\\x%x", (unsigned int) ch); +#endif } } diff -ur binutils-19990818/binutils/winduni.h binutils-ts1/binutils/winduni.h --- binutils-19990818/binutils/winduni.h Tue Jan 11 07:32:26 2000 +++ binutils-ts1/binutils/winduni.h Thu Feb 17 17:34:27 2000 @@ -58,3 +58,7 @@ file, but it hardly seems worth it for one function. */ extern PTR res_alloc PARAMS ((size_t)); + +/* Nonzero if we convert to/from UTF-8, rather than (incorrect) ASCII. */ + +extern int use_utf8;
Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]