This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
here's the current version of my script. it updates most, but not all, fields. the format ones are pretty hard to extract, and the diff to the current glibc db is significant. i'll post this as a proper patch once master opens for 2.24 and i can land the pending locale updates. -mike
#!/usr/bin/python # -*- coding: utf-8 -*- # Written by Mike Frysinger <vapier@gentoo.org> for much great glory. """Helper tool for importing current CLDR data. See http://cldr.unicode.org/ for more details.""" # TODO: Need to handle copy directives better so we can see when a value # has changed for a specific locale, but it's copying the (wrong) values # from others. # TODO: Add missing fields. # TODO: Add support for updating locale/iso-3166.def via supplementalData.xml. # TODO: Add support for updating locale/iso-4217.def. from __future__ import print_function import argparse import datetime import errno import logging import os import re import subprocess import sys import time from xml.etree import ElementTree # Where to store CLDR/etc... data files we fetch. DEFAULT_WORKING_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cldr-%(version)s') # Whether we should clean up newlines/comments. REWRITE_STYLE = False def u_encode(text): """Convert unicode |text| to <U####> format.""" return ''.join('<U%04X>' % ord(x) for x in text) _U_MATCH = re.compile(r'<U([0-9A-Fa-f]+)>') def u_decode(text): """Convert <U####> format in |text|.""" unirep = lambda m: chr(int(m.group(1), 16)) return _U_MATCH.sub(unirep, text) def get_parser(): """Return an argument parser for this module.""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--working-dir', default=DEFAULT_WORKING_DIR, help='Where to download files (default: %(default)s)') parser.add_argument('-v', '--version', default=Cldr.CURR_VERSION, help='Version of CLDR to use (default: %(default)s)') parser.add_argument('locales', nargs='*', help='Locales to generate') return parser def logging_init(debug=False): """Set up the logging module.""" fmt = '%(asctime)s: %(levelname)-7s: ' fmt += '%(message)s' # 'Sat, 05 Oct 2013 18:58:50 -0400 (EST)' tzname = time.strftime('%Z', time.localtime()) datefmt = '%a, %d %b %Y %H:%M:%S ' + tzname level = logging.DEBUG if debug else logging.INFO handler = logging.StreamHandler(stream=sys.stdout) formatter = logging.Formatter(fmt, datefmt) handler.setFormatter(formatter) logger = logging.getLogger() logger.addHandler(handler) logger.setLevel(level) class cached_property(object): # pylint: disable=invalid-name """Like @property but cached""" def __init__(self, func): self.func = func def __get__(self, instance, _owner): if instance is None: return self value = instance.__dict__[self.func.__name__] = self.func(instance) return value class Iso639(object): """Content for the ISO-639 database.""" # Link to upstream ISO-639-2 database. ISO639_2_URI = 'http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt' # Path to our local copy of the ISO-639 database. PATH = os.path.join(os.path.dirname(os.path.dirname( os.path.realpath(__file__))), 'locale', 'iso-639.def') # Regex to process our local ISO-639 def file. _LINE_MATCH = re.compile( r'^(DEFINE_LANGUAGE_CODE \("([^"]*)", ([^,]*), ([^,]*), ([^,]*)\)' r'|DEFINE_LANGUAGE_CODE3 \("([^"]*)", ([^,]*), ([^,]*)\))$') def __init__(self): self.db = {} with open(self.PATH) as fp: for line in fp: m = self._LINE_MATCH.match(line) if m: if m.group(1) is None: # DEFINE_LANGUAGE_CODE3 form. self.db[m.group(6)] = (m.group(5), m.group(7)) else: # DEFINE_LANGUAGE_CODE form. self.db[m.group(2)] = (m.group(1), m.group(3), m.group(4)) def get_term(self, lang): """Return the ISO 639-2/T (Terminology) code.""" entry = self.db.get(lang, ()) if len(entry) == 3: return entry[1] def get_bib(self, lang): """Return the ISO 639-2/B (Bibliographic) code.""" entry = self.db.get(lang, ()) if len(entry) == 3: return entry[2] def _download_uri(self, path): """Download the ISO-639-2 db.""" iso639 = os.path.join(path, os.path.basename(self.ISO639_2_URI)) if not os.path.exists(iso639): subprocess.check_call(['wget', '-O', iso639, self.ISO639_2_URI]) self._load_iso639(iso639) @staticmethod def _load_iso639(db): """Load ISO-639-2 database. http://www.loc.gov/standards/iso639-2/ascii_8bits.html An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given), an alpha-2 code (when given), an English name, and a French name of a language are all separated by pipe (|) characters. """ db = {} with open(db) as fp: for line in fp: bcode, tcode, code, _en, _fr = line.rstrip().split('|') if code: db[code] = (bcode, tcode) return db class CldrLocale(object): """Content for a single locale in the cldr database.""" _DAY_KEYS = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat') def __init__(self, cldr, locale, iso639): self.cldr = cldr self.locale = locale self.locale_root = cldr.load_lang(locale) self.lang_root = cldr.load_lang(self.lang) self.iso639 = iso639 @cached_property def lang(self): """The locale's short language code.""" root = self.locale_root.find('identity/language') return root.get('type') @cached_property def territory(self): """The locale's short territory code.""" root = self.locale_root.find('identity/territory') return root.get('type') @cached_property def en_lang(self): """The name of the language in English.""" root = self.cldr.load_lang('en') names = root.find('localeDisplayNames') # First see if the locale has a name before we fall back to the lang. langs_root = names.find('languages') lang_root = langs_root.find('language[@type="%s"]' % self.locale) if lang_root is None: lang_root = langs_root.find('language[@type="%s"]' % self.lang) return lang_root.findtext('.') @cached_property def en_territory(self): """The name of the territory in English.""" root = self.cldr.load_lang('en') names = root.find('localeDisplayNames') return names.find('territories/territory[@type="%s"]' % self.territory).findtext('.') @cached_property def country_ab2(self): """Two-letter ISO-3166 country code.""" # TODO: Implement this. @cached_property def country_ab3(self): """Three-letter ISO-3166 country code.""" # TODO: Implement this. @cached_property def lang_name(self): """The localized name for the language.""" for root in (self.locale_root, self.lang_root): names = root.find('localeDisplayNames') if names is not None: langs_root = names.find('languages') if langs_root is not None: lang_root = langs_root.find('language[@type="%s"]' % self.lang) if lang_root is not None: return lang_root.findtext('.') @cached_property def lang_term(self): """Three-letter ISO 639-2/T (Terminology) code""" return self.iso639.get_term(self.lang) @cached_property def lang_lib(self): """Three-letter ISO 639-2/B (Bibliographic) code""" return self.iso639.get_bib(self.lang) @cached_property def country_name(self): """The localiezd name for the territory.""" for root in (self.locale_root, self.lang_root): names = root.find('localeDisplayNames') if names is not None: name = names.find('territories/territory[@type="%s"]' % self.territory) if name is not None: return name.findtext('.') @cached_property def country_num(self): """ISO 3166-1 numeric code""" root = self.cldr.load_supp('supplementalData') codes = root.find('codeMappings/territoryCodes[@type="%s"]' % self.territory) return codes.get('numeric') @cached_property def country_term(self): """ISO 3166-1 alpha-3 code""" root = self.cldr.load_supp('supplementalData') codes = root.find('codeMappings/territoryCodes[@type="%s"]' % self.territory) return codes.get('alpha3').lower() @cached_property def tel_int_fmt(self): """Telephone format for international calling.""" # TODO: Implement this. @cached_property def tel_dom_fmt(self): """Telephone format for domestic calling.""" # TODO: Implement this. @cached_property def int_select(self): """Telephone prefix for calling international numbers.""" # TODO: Implement this. @cached_property def int_prefix(self): """Telephone international country code prefix.""" root = self.cldr.load_supp('telephoneCodeData') code = root.find('telephoneCodeData/codesByTerritory[@territory="%s"]' '/telephoneCountryCode' % self.territory) return code.get('code') @cached_property def int_curr_symbol(self): """Need to rectify w/locale/iso-4217.def.""" # The xmlpath support in python is not complete, so we need to search # for the currency w/missing @to attribute ourselves. root = self.cldr.load_supp('supplementalData') currencies = root.find('currencyData/region[@iso3166="%s"]' % self.territory) for currency in currencies.getchildren(): if 'to' not in currency.keys(): break else: raise ValueError('Could not find a currency for %s' % self.territory) return currency.get('iso4217') @cached_property def currency_symbol(self): """Need to rectify w/locale/iso-4217.def.""" # First search the locale, then the lang dbs. for root in (self.locale_root, self.lang_root): numbers_root = root.find('numbers') if numbers_root is None: continue symbol_ele = numbers_root.find('currencies/currency[@type="%s"]' '/symbol' % self.int_curr_symbol) if symbol_ele is not None: return symbol_ele.findtext('.') # Try the common currency database. chars_root = self.cldr.load_supp('characters') for symbol_ele in chars_root.find('characters' '/character-fallback').getchildren(): if symbol_ele.findtext('substitute') == self.int_curr_symbol: return symbol_ele.get('value') # A few symbols have no translation. return self.int_curr_symbol @cached_property def number_system(self): """Get the active number system for this locale.""" for root in (self.locale_root, self.lang_root): numbers_root = root.find('numbers') if numbers_root is None: continue # If there's a default labeled, use it. Otherwise just go with # the first one found. It should be the only one. num_sys_ele = root.find('defaultNumberingSystem') if num_sys_ele is None: return numbers_root.find('symbols') else: return numbers_root.find('symbols[@numberSystem="%s"]' % num_sys_ele.findtext('.')) @cached_property def decimal_point(self): """The symbol used to denote decimal points.""" num_symbols_root = self.number_system try: return num_symbols_root.find('decimal').findtext('.') except AttributeError: return None @cached_property def thousands_sep(self): """The symbol used to group thousands digits.""" num_symbols_root = self.number_system try: return num_symbols_root.find('group').findtext('.') except AttributeError: return None @cached_property def grouping(self): # TODO: Implement this. pass def _lookup_day_mon(self, cal_field, cal_type, cal_idxs): """Look up various calendar fields.""" for root in (self.locale_root, self.lang_root): dates_root = root.find('dates') if dates_root is None: continue calendars_root = dates_root.find('calendars') if calendars_root is None: continue # XXX: Look up type in calendarPreference ? calendar_root = calendars_root.find('calendar[@type="gregorian"]') if calendar_root is None: continue dm_root = None for key in ('stand-alone', 'format', 'narrow'): ctx_root = calendar_root.find('%ss/%sContext[@type="%s"]' % (cal_field, cal_field, key)) if ctx_root is None: continue dm_root = ctx_root.find('%sWidth[@type="%s"]' % (cal_field, cal_type)) if dm_root is None: continue ret = [dm_root.find('%s[@type="%s"]' % (cal_field, x)) for x in cal_idxs] if None not in ret: return [x.findtext('.') for x in ret] def _lookup_day(self, width_type): """Internal helper for abday/day lookups.""" return self._lookup_day_mon('day', width_type, self._DAY_KEYS) def _lookup_mon(self, width_type): """Internal helper for abmon/mon lookups.""" return self._lookup_day_mon('month', width_type, range(1, 13)) @cached_property def abday(self): """Abbreviated localized names for the days of the week.""" return self._lookup_day('abbreviated') @cached_property def day(self): """Full localized names for the days of the week.""" return self._lookup_day('wide') @cached_property def abmon(self): """Abbreviated localized names for the months.""" return self._lookup_mon('abbreviated') @cached_property def mon(self): """Full localized names for the months.""" return self._lookup_mon('wide') # http://www.unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns _CLDR_TO_POSIX_FMT = { # year 'y': '%%y', 'yy': '%%y', 'yyyy': '%%Y', # month 'M': '%%m', 'MM': '%%m', 'MMM': '%%b', 'MMMM': '%%B', # day 'd': '%%d', 'dd': '%%d', # period 'a': '%%p', # hour 'h': '%%I', 'hh': '%%I', 'H': '%%H', 'HH': '%%H', # minute 'm': '%%M', 'mm': '%%M', # second 's': '%%S', 'ss': '%%S', } @classmethod def _to_posix_fmt(cls, fmt): """Convert the CLDR notation to what POSIX uses.""" lookup = lambda m: cls._CLDR_TO_POSIX_FMT[m.group(1)] return re.sub(r'\b(' + '|'.join(cls._CLDR_TO_POSIX_FMT.keys()) + r')\b', lookup, fmt) @cached_property def hours_format(self): """Return 24 or 12 depending on preferred %H or %h format""" root = self.cldr.load_supp('supplementalData') datasets = root.find('timeData') pref = None for dataset in datasets.findall('hours'): territories = dataset.get('regions') value = dataset.get('preferred') # TODO: Make this walk logic more robust/common. territories = territories.split() if '001' in territories: if pref is None: # The allowed field makes this tricky. #pref = value pass if self.territory in territories: pref = value if pref == 'H': return '24' elif pref == 'h': return '12' elif pref is None: return None else: raise ValueError('Unknown hour value: %s' % pref) @cached_property def am_pm(self): """Localized AM/PM time fields when 12 hour clocks are used.""" if self.hours_format == '24': return ['', ''] elif self.hours_format is None: return None return self._lookup_day_mon('dayPeriod', 'abbreviated', ('am', 'pm')) def _lookup_d_t_fmt(self, dt, dt_type='medium'): """Internal helper for various fmt lookups.""" for root in (self.locale_root, self.lang_root): dates_root = root.find('dates') if dates_root is None: continue calendars_root = dates_root.find('calendars') # XXX: Look up type in calendarPreference ? calendar_root = calendars_root.find('calendar[@type="gregorian"]') fmts = calendar_root.find('%sFormats/%sFormatLength[@type="%s"]' '/%sFormat/pattern' % (dt, dt, dt_type, dt)) if fmts is not None: return fmts.findtext('.') @cached_property def d_t_fmt(self): """Appropriate date and time representation (%c) Example: $ date +'%a %d %b %Y %r %Z' Tue 09 Feb 2016 06:39:48 PM EST """ return self._to_posix_fmt( self._lookup_d_t_fmt('dateTime').replace( '{0}', self._t_fmt).replace( '{1}', self._d_fmt)) @cached_property def _d_fmt(self): """Internal helper for the raw d_fmt field.""" return self._lookup_d_t_fmt('date') @cached_property def d_fmt(self): """Appropriate date representation (%x) Example: $ date +'%m/%d/%Y' 02/09/2016 """ return self._to_posix_fmt(self._d_fmt) @cached_property def _t_fmt(self): """Internal helper for the raw t_fmt field.""" return self._lookup_d_t_fmt('time') @cached_property def t_fmt(self): """Appropriate time representation (%X) Example: $ date +%r 06:41:21 PM """ return self._to_posix_fmt(self._t_fmt) @cached_property def t_fmt_ampm(self): """Appropriate AM/PM time representation (%r) Example: $ date +'%I:%M:%S %p' 06:41:21 PM """ if self.hours_format == '24': return '' elif self.hours_format is None: return None return None @cached_property def date_fmt(self): """Appropriate date representation (date(1)) $ date +'%a %b %e %H:%M:%S %Z %Y' Tue Feb 9 06:39:48 EST 2016 """ pass @cached_property def week(self): """DAYSINWEEK;WEEKSTARTDATE;MINWEEKLEN field""" pass @cached_property def first_weekday(self): """Number of day in the week for the first column in the calendar.""" root = self.cldr.load_supp('supplementalData') data = root.find('weekData') first = None for start in data.findall('firstDay'): territories = start.get('territories') day = start.get('day') # Throw out ones we don't care about. if start.get('alt') is not None: continue # TODO: Make this walk logic more robust/common. territories = territories.split() if '001' in territories: if first is None: first = day if self.territory in territories: first = day return self._DAY_KEYS.index(first) + 1 @cached_property def first_workday(self): """Number of day in the week for the first working day.""" root = self.cldr.load_supp('supplementalData') data = root.find('weekData') first = None for start in data.findall('weekendEnd'): territories = start.get('territories') day = start.get('day') # TODO: Make this walk logic more robust/common. territories = territories.split() if '001' in territories: if first is None: first = day if self.territory in territories: first = day return self._DAY_KEYS.index(first) + 1 @cached_property def measurement(self): """Return 1 for metric and 2 for imperial""" root = self.cldr.load_supp('supplementalData') measurement = None for system in root.findall('measurementData/measurementSystem'): territories = system.get('territories') stype = system.get('type') # Throw out ones we don't care about. if system.get('category') == 'temperature' or stype == 'UK': continue # TODO: Make this walk logic more robust/common. territories = territories.split() if '001' in territories: if measurement is None: measurement = stype if self.territory in territories: measurement = stype # We don't use imperial settings for Myanmar even though CLDR does. # https://en.wikipedia.org/wiki/Myanmar_units_of_measurement if self.territory == 'MM': if measurement == 'US': measurement = 'metric' else: raise ValueError('CLDR is updated; drop this hack') if measurement == 'metric': return '1' elif measurement == 'US': return '2' else: raise ValueError('Do not understand type %s' % measurement) @cached_property def measurement_copy(self): """We copy other locales for most""" if self.locale in ('en_US', 'i18n'): return None elif self.measurement == '1': return 'i18n' elif self.measurement == '2': return 'en_US' else: raise ValueError('Unknown measurement %s' % self.measurement) @cached_property def paper(self): """Return the paper type""" root = self.cldr.load_supp('supplementalData') paper = None for system in root.findall('measurementData/paperSize'): territories = system.get('territories') stype = system.get('type') # TODO: Make this walk logic more robust/common. territories = territories.split() if '001' in territories: if paper is None: paper = stype if self.territory in territories: paper = stype return paper @cached_property def paper_height(self): """Return the height of paper (in mm)""" return {'A4': '297', 'US-Letter': '279'}.get(self.paper) @cached_property def paper_width(self): """Return the width of paper (in mm)""" return {'A4': '210', 'US-Letter': '216'}.get(self.paper) @cached_property def paper_copy(self): """We copy other locales for most""" if self.locale in ('en_US', 'i18n'): return None elif self.paper == 'A4': return 'i18n' elif self.paper == 'US-Letter': return 'en_US' else: raise ValueError('Unknown paper %s' % self.paper) class Cldr(object): """Content for the cldr database.""" # The current release version that we use. CURR_VERSION = '28' # Where to find the CLDR data. URI = 'http://unicode.org/Public/cldr/%(version)s/core.zip' def __init__(self, path, version): fields = {'version': version} self.dir = path % fields self.uri = self.URI % fields self.version = version self.date = None self.main_dbs = {} self.supp_dbs = {} self.iso639 = Iso639() # Set up the working dir. if not os.path.exists(self.dir): os.makedirs(self.dir) def download(self): """Download the current cldr database.""" # Download the CLDR data. archive = os.path.join(self.dir, 'core.zip') if not os.path.exists(archive): subprocess.check_call(['wget', '-O', archive, self.uri]) self.date = datetime.datetime.fromtimestamp(os.path.getmtime(archive)) # Unpack the CLDR data. common_dir = os.path.join(self.dir, 'common') if not os.path.exists(common_dir): subprocess.check_call(['unzip', '-u', 'core.zip'], cwd=self.dir) def _load_db(self, db, subdir, cache): """Load the database |db| out of |subdir| using |cache|.""" if db not in cache: db_path = os.path.join(self.dir, 'common', subdir, '%s.xml' % db) tree = ElementTree.parse(db_path) cache[db] = tree.getroot() return cache[db] def _load_main(self, db): """Load database |db| from the main repo.""" return self._load_db(db, 'main', self.main_dbs) def load_lang(self, lang): """Load the language |lang| database.""" return self._load_main(lang) def load_supp(self, db): """Load database |db| from the supplemental repo.""" return self._load_db(db, 'supplemental', self.supp_dbs) def locale(self, locale): """Get an object for a specific cldr |locale|.""" return CldrLocale(self, locale, self.iso639) class LocaleError(Exception): """Error w/Locale objects""" class LocaleCategory(object): """Content for a single locale category.""" def __init__(self, name='', content=(), header=()): self.name = name.lower() self.content = content self.header = header def __str__(self): padding = '\n' if REWRITE_STYLE else '' ret = '' if self.header: ret += padding + '\n'.join(self.header) + '\n' lc_name = self.name.upper() ret += (padding + '\n'.join([lc_name] + self.content + ['END %s' % lc_name]) + '\n') return ret class Locale(object): """Content for a locale file itself.""" def __init__(self, name=None, path=None): self.name = name self.header = [] self.lc_identification = [] self.lc_ctype = [] self.lc_collate = [] self.lc_time = [] self.lc_numeric = [] self.lc_monetary = [] self.lc_messages = [] self.lc_paper = [] self.lc_name = [] self.lc_address = [] self.lc_telephone = [] self.lc_measurement = [] self.categories = [] self.cldr = None if path is not None: self.read(path) @staticmethod def _trim_extra_lines(lines, leading=True, trailing=True, consecutive=True, comments=False): """Helper to clean up the style of the data files.""" if not REWRITE_STYLE: return lines # Clear leading blank lines. if leading: while lines and not lines[0]: lines.pop(0) # Clear trailing blank lines. if trailing: while lines and not lines[-1]: lines.pop(-1) # Clear consecutive blank lines. if consecutive: i = 0 while i < len(lines) - 1: if not lines[i] and not lines[i + 1]: lines.pop(i) else: i += 1 # Trim blank comment lines that start/end a section. if comments: i = 0 while i < len(lines): if (lines[i] == '%' and (i == 0 or not lines[i - 1] or lines[i - 1][0] != '%')): lines.pop(i) elif (lines[i] == '%' and (i == len(lines) - 1 or not lines[i + 1] or lines[i + 1][0] != '%')): lines.pop(i) else: i += 1 return lines def readfp(self, fp): """Load the locale content from |fp|""" lines = [x.rstrip() for x in fp.readlines()] self._trim_extra_lines(lines) # Process the leading few lines. comment_line = lines.pop(0) escape_line = lines.pop(0) self.header = [comment_line, escape_line] if escape_line.startswith('comment_char'): escape_line, comment_line = comment_line, escape_line line = comment_line if line.startswith('comment_char'): if line.split()[1] != '%': raise LocaleError('Bad comment_char: %s' % line) else: raise LocaleError('Second line should be comment_char, not %s' % line) line = escape_line if line.startswith('escape_char'): if line.split()[1] != '/': raise LocaleError('Bad escape_char: %s' % line) else: raise LocaleError('First line should be escape_char, not %s' % line) # Now walk each locale category. while lines: # Extract any leading comments. header = [] while lines: line = lines[0] if line.startswith('LC_'): break elif not line or line[0] == '%': header.append(line) lines.pop(0) continue else: break self._trim_extra_lines(header) if not lines: if header: print('Throwing away trailing lines: %r' % header, file=sys.stderr) return line = lines.pop(0) if line[0:3] != 'LC_': raise LocaleError('Bad line state: %s' % line) cat = line.lower() cat_lines = [] while lines: line = lines.pop(0) if line == 'END %s' % cat.upper(): break cat_lines.append(line) self._trim_extra_lines(cat_lines) lc = LocaleCategory(name=cat, content=cat_lines, header=header) setattr(self, cat, lc) self.categories.append(cat) def read(self, path): """Load the locale file from |path|""" self.readfp(open(path)) def writefp(self, fp): """Write the locale content to |fp|""" if REWRITE_STYLE: header = ['comment_char %', 'escape_char /'] else: header = self.header fp.write('\n'.join(header) + '\n') for category in self.categories: lc = getattr(self, category) fp.write(str(lc)) def write(self, path): """Write the locale content to |path|""" self.writefp(open(path, 'w')) def update_cldr(self, cldr): """Merge CLDR updates in to this locale.""" try: cldr_locale = cldr.locale(self.name) except OSError as e: if e.errno == errno.ENOENT: return raise # Start updating the actual data. cldr_values = { 'generator': os.path.basename(__file__), 'english_lang_name': cldr_locale.en_lang, 'english_territory_name': cldr_locale.en_territory, 'source_name': 'Unicode Common Locale Data Repository (CLDR)', 'source_version': cldr.version, 'source_uri': cldr.uri.replace('/', '//'), 'source_date': cldr.date.strftime('%Y-%m-%d'), 'lang': cldr_locale.lang, 'territory': cldr_locale.territory, 'locale': cldr_locale.locale, } all_values = {} all_values['lc_identification'] = { #'title': ('%(english_lang_name)s language locale for ' # '%(english_territory_name)s'), #'source': '%(source_name)s', #'address': '%(source_uri)s', #'contact': 'http:////cldr.unicode.org//index//process', #'email': 'bug-glibc-locales@gnu.org', 'tel': '', 'fax': '', 'language': '%(english_lang_name)s', 'territory': '%(english_territory_name)s', #'revision': '%(source_version)s', #'date': '%(source_date)s', } # These are based on the charset, not the locale. all_values['lc_ctype'] = {} all_values['lc_collate'] = {} all_values['lc_time'] = { #'abday': cldr_locale.abday, #'day': cldr_locale.day, #'abmon': cldr_locale.abmon, #'mon': cldr_locale.mon, #'am_pm': cldr_locale.am_pm, #'d_t_fmt': cldr_locale.d_t_fmt, #'d_fmt': cldr_locale.d_fmt, #'t_fmt': cldr_locale.t_fmt, #'t_fmt_ampm': cldr_locale.t_fmt_ampm, #'date_fmt': cldr_locale.date_fmt, #'week': cldr_locale.week, #'first_weekday': int(cldr_locale.first_weekday), #'first_workday': int(cldr_locale.first_workday), } all_values['lc_numeric'] = { #'decimal_point': cldr_locale.decimal_point, #'thousands_sep': cldr_locale.thousands_sep, 'grouping': cldr_locale.grouping, } all_values['lc_monetary'] = { #'int_curr_symbol': cldr_locale.int_curr_symbol + ' ', #'currency_symbol': cldr_locale.currency_symbol, } all_values['lc_messages'] = { } all_values['lc_paper'] = { 'paper_height': int(cldr_locale.paper_height), 'paper_width': int(cldr_locale.paper_width), #'copy': cldr_locale.paper_copy, } # XXX: Need a data source for this. all_values['lc_name'] = { } all_values['lc_address'] = { #'postal_fmt': 'country_name': cldr_locale.country_name, #'country_post': 'country_ab2': cldr_locale.country_ab2, 'country_ab3': cldr_locale.country_ab3, 'country_num': int(cldr_locale.country_num), #'country_car': #'country_isbn': 'lang_name': cldr_locale.lang_name, 'lang_ab': cldr_locale.lang, 'lang_term': cldr_locale.lang_term, 'lang_lib': cldr_locale.lang_lib, } all_values['lc_telephone'] = { 'tel_int_fmt': cldr_locale.tel_int_fmt, 'tel_dom_fmt': cldr_locale.tel_dom_fmt, 'int_select': cldr_locale.int_select, 'int_prefix': cldr_locale.int_prefix, } all_values['lc_measurement'] = { 'measurement': int(cldr_locale.measurement), 'copy': cldr_locale.measurement_copy, } # Walk all the categories. for category in self.categories: lc = getattr(self, category) values = all_values[category] if not values: continue # Walk each line in this locale category. start_of_line = None full_line = '' i = 0 while i < len(lc.content): line = lc.content[i] if not line: i += 1 continue # If the line ends with / it is wrapped, so unwrap it before # we check for updates to the value. if line.endswith('/'): if not full_line: start_of_line = i full_line += line[:-1].lstrip() i += 1 continue elif full_line: line = full_line + line.lstrip() full_line = '' else: start_of_line = None # Process this line. key = line.split()[0] new_value = values.get(key) if new_value is not None: is_int = isinstance(new_value, int) if is_int: new_value = str(new_value) m = re.match(r'\s*(.*?)\s+([0-9]+)$', line) else: if isinstance(new_value, (tuple, list, set)): new_value = '";"'.join(u_encode(x % cldr_values) for x in new_value) elif key != 'copy': new_value %= cldr_values if category != 'lc_identification': new_value = u_encode(new_value) m = re.match(r'\s*([^"]*)"(.*)"$', line) if m: # We should standardize case at some point. if new_value.lower() != m.group(2).lower(): disp_key = ('%s:%s' % (category.upper(), key) if key == 'copy' else key) logging.info('%s: %s: changing {%s} to {%s}', self.name, disp_key, u_decode(m.group(2)), u_decode(new_value)) leading_line = m.group(1) # This is tricky as we have to delete most of the # multiline, then update the one remaining. if start_of_line is not None: for _ in range(start_of_line, i): lc.content.pop(start_of_line) i = start_of_line if '";"' in new_value: leading_line = leading_line.rstrip() + '\t' num_tabs = (len(leading_line) // 8) + 1 new_value = new_value.replace( '";"', '";/\n' + ('\t' * num_tabs) + '"') fmt = '%s %s' if is_int else '%s"%s"' lc.content[i] = fmt % (leading_line, new_value) i += 1 def main(argv): """The main entry point.""" parser = get_parser() opts = parser.parse_args(argv) logging_init(opts) # Get a handle to the cldr database. cldr = Cldr(opts.working_dir, opts.version) cldr.download() # Process all the locales the user told us to. for locale in opts.locales: name = os.path.basename(locale) # Skip a few known "bad" locales. if name.split('_', 1)[0] in ('iso14651', 'translit', 'C', 'POSIX'): continue logging.info('Updating %s', locale) try: loc = Locale(name=name, path=locale) try: loc.update_cldr(cldr) except Exception: logging.error('%s: updating failed', locale, exc_info=True) loc.write(locale + '.new') os.rename(locale + '.new', locale) except UnicodeDecodeError: logging.error('%s: bad encodings', locale, exc_info=True) subprocess.check_call(['file', locale]) except (IndexError, LocaleError): logging.error('%s: loading failed', locale, exc_info=True) if __name__ == '__main__': exit(main(sys.argv[1:]))
Attachment:
signature.asc
Description: Digital signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |