locale.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #
00003 # Copyright 2009 Facebook
00004 #
00005 # Licensed under the Apache License, Version 2.0 (the "License"); you may
00006 # not use this file except in compliance with the License. You may obtain
00007 # a copy of the License at
00008 #
00009 #     http://www.apache.org/licenses/LICENSE-2.0
00010 #
00011 # Unless required by applicable law or agreed to in writing, software
00012 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
00013 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
00014 # License for the specific language governing permissions and limitations
00015 # under the License.
00016 
00017 """Translation methods for generating localized strings.
00018 
00019 To load a locale and generate a translated string::
00020 
00021     user_locale = locale.get("es_LA")
00022     print user_locale.translate("Sign out")
00023 
00024 locale.get() returns the closest matching locale, not necessarily the
00025 specific locale you requested. You can support pluralization with
00026 additional arguments to translate(), e.g.::
00027 
00028     people = [...]
00029     message = user_locale.translate(
00030         "%(list)s is online", "%(list)s are online", len(people))
00031     print message % {"list": user_locale.list(people)}
00032 
00033 The first string is chosen if len(people) == 1, otherwise the second
00034 string is chosen.
00035 
00036 Applications should call one of load_translations (which uses a simple
00037 CSV format) or load_gettext_translations (which uses the .mo format
00038 supported by gettext and related tools).  If neither method is called,
00039 the locale.translate method will simply return the original string.
00040 """
00041 
00042 from __future__ import absolute_import, division, with_statement
00043 
00044 import csv
00045 import datetime
00046 import logging
00047 import os
00048 import re
00049 
00050 _default_locale = "en_US"
00051 _translations = {}
00052 _supported_locales = frozenset([_default_locale])
00053 _use_gettext = False
00054 
00055 
00056 def get(*locale_codes):
00057     """Returns the closest match for the given locale codes.
00058 
00059     We iterate over all given locale codes in order. If we have a tight
00060     or a loose match for the code (e.g., "en" for "en_US"), we return
00061     the locale. Otherwise we move to the next code in the list.
00062 
00063     By default we return en_US if no translations are found for any of
00064     the specified locales. You can change the default locale with
00065     set_default_locale() below.
00066     """
00067     return Locale.get_closest(*locale_codes)
00068 
00069 
00070 def set_default_locale(code):
00071     """Sets the default locale, used in get_closest_locale().
00072 
00073     The default locale is assumed to be the language used for all strings
00074     in the system. The translations loaded from disk are mappings from
00075     the default locale to the destination locale. Consequently, you don't
00076     need to create a translation file for the default locale.
00077     """
00078     global _default_locale
00079     global _supported_locales
00080     _default_locale = code
00081     _supported_locales = frozenset(_translations.keys() + [_default_locale])
00082 
00083 
00084 def load_translations(directory):
00085     u"""Loads translations from CSV files in a directory.
00086 
00087     Translations are strings with optional Python-style named placeholders
00088     (e.g., "My name is %(name)s") and their associated translations.
00089 
00090     The directory should have translation files of the form LOCALE.csv,
00091     e.g. es_GT.csv. The CSV files should have two or three columns: string,
00092     translation, and an optional plural indicator. Plural indicators should
00093     be one of "plural" or "singular". A given string can have both singular
00094     and plural forms. For example "%(name)s liked this" may have a
00095     different verb conjugation depending on whether %(name)s is one
00096     name or a list of names. There should be two rows in the CSV file for
00097     that string, one with plural indicator "singular", and one "plural".
00098     For strings with no verbs that would change on translation, simply
00099     use "unknown" or the empty string (or don't include the column at all).
00100 
00101     The file is read using the csv module in the default "excel" dialect.
00102     In this format there should not be spaces after the commas.
00103 
00104     Example translation es_LA.csv:
00105 
00106         "I love you","Te amo"
00107         "%(name)s liked this","A %(name)s les gust\u00f3 esto","plural"
00108         "%(name)s liked this","A %(name)s le gust\u00f3 esto","singular"
00109 
00110     """
00111     global _translations
00112     global _supported_locales
00113     _translations = {}
00114     for path in os.listdir(directory):
00115         if not path.endswith(".csv"):
00116             continue
00117         locale, extension = path.split(".")
00118         if not re.match("[a-z]+(_[A-Z]+)?$", locale):
00119             logging.error("Unrecognized locale %r (path: %s)", locale,
00120                           os.path.join(directory, path))
00121             continue
00122         f = open(os.path.join(directory, path), "r")
00123         _translations[locale] = {}
00124         for i, row in enumerate(csv.reader(f)):
00125             if not row or len(row) < 2:
00126                 continue
00127             row = [c.decode("utf-8").strip() for c in row]
00128             english, translation = row[:2]
00129             if len(row) > 2:
00130                 plural = row[2] or "unknown"
00131             else:
00132                 plural = "unknown"
00133             if plural not in ("plural", "singular", "unknown"):
00134                 logging.error("Unrecognized plural indicator %r in %s line %d",
00135                               plural, path, i + 1)
00136                 continue
00137             _translations[locale].setdefault(plural, {})[english] = translation
00138         f.close()
00139     _supported_locales = frozenset(_translations.keys() + [_default_locale])
00140     logging.info("Supported locales: %s", sorted(_supported_locales))
00141 
00142 
00143 def load_gettext_translations(directory, domain):
00144     """Loads translations from gettext's locale tree
00145 
00146     Locale tree is similar to system's /usr/share/locale, like:
00147 
00148     {directory}/{lang}/LC_MESSAGES/{domain}.mo
00149 
00150     Three steps are required to have you app translated:
00151 
00152     1. Generate POT translation file
00153         xgettext --language=Python --keyword=_:1,2 -d cyclone file1.py file2.html etc
00154 
00155     2. Merge against existing POT file:
00156         msgmerge old.po cyclone.po > new.po
00157 
00158     3. Compile:
00159         msgfmt cyclone.po -o {directory}/pt_BR/LC_MESSAGES/cyclone.mo
00160     """
00161     import gettext
00162     global _translations
00163     global _supported_locales
00164     global _use_gettext
00165     _translations = {}
00166     for lang in os.listdir(directory):
00167         if lang.startswith('.'):
00168             continue  # skip .svn, etc
00169         if os.path.isfile(os.path.join(directory, lang)):
00170             continue
00171         try:
00172             os.stat(os.path.join(directory, lang, "LC_MESSAGES", domain + ".mo"))
00173             _translations[lang] = gettext.translation(domain, directory,
00174                                                       languages=[lang])
00175         except Exception, e:
00176             logging.error("Cannot load translation for '%s': %s", lang, str(e))
00177             continue
00178     _supported_locales = frozenset(_translations.keys() + [_default_locale])
00179     _use_gettext = True
00180     logging.info("Supported locales: %s", sorted(_supported_locales))
00181 
00182 
00183 def get_supported_locales():
00184     """Returns a list of all the supported locale codes."""
00185     return _supported_locales
00186 
00187 
00188 class Locale(object):
00189     """Object representing a locale.
00190 
00191     After calling one of `load_translations` or `load_gettext_translations`,
00192     call `get` or `get_closest` to get a Locale object.
00193     """
00194     @classmethod
00195     def get_closest(cls, *locale_codes):
00196         """Returns the closest match for the given locale code."""
00197         for code in locale_codes:
00198             if not code:
00199                 continue
00200             code = code.replace("-", "_")
00201             parts = code.split("_")
00202             if len(parts) > 2:
00203                 continue
00204             elif len(parts) == 2:
00205                 code = parts[0].lower() + "_" + parts[1].upper()
00206             if code in _supported_locales:
00207                 return cls.get(code)
00208             if parts[0].lower() in _supported_locales:
00209                 return cls.get(parts[0].lower())
00210         return cls.get(_default_locale)
00211 
00212     @classmethod
00213     def get(cls, code):
00214         """Returns the Locale for the given locale code.
00215 
00216         If it is not supported, we raise an exception.
00217         """
00218         if not hasattr(cls, "_cache"):
00219             cls._cache = {}
00220         if code not in cls._cache:
00221             assert code in _supported_locales
00222             translations = _translations.get(code, None)
00223             if translations is None:
00224                 locale = CSVLocale(code, {})
00225             elif _use_gettext:
00226                 locale = GettextLocale(code, translations)
00227             else:
00228                 locale = CSVLocale(code, translations)
00229             cls._cache[code] = locale
00230         return cls._cache[code]
00231 
00232     def __init__(self, code, translations):
00233         self.code = code
00234         self.name = LOCALE_NAMES.get(code, {}).get("name", u"Unknown")
00235         self.rtl = False
00236         for prefix in ["fa", "ar", "he"]:
00237             if self.code.startswith(prefix):
00238                 self.rtl = True
00239                 break
00240         self.translations = translations
00241 
00242         # Initialize strings for date formatting
00243         _ = self.translate
00244         self._months = [
00245             _("January"), _("February"), _("March"), _("April"),
00246             _("May"), _("June"), _("July"), _("August"),
00247             _("September"), _("October"), _("November"), _("December")]
00248         self._weekdays = [
00249             _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"),
00250             _("Friday"), _("Saturday"), _("Sunday")]
00251 
00252     def translate(self, message, plural_message=None, count=None):
00253         """Returns the translation for the given message for this locale.
00254 
00255         If plural_message is given, you must also provide count. We return
00256         plural_message when count != 1, and we return the singular form
00257         for the given message when count == 1.
00258         """
00259         raise NotImplementedError()
00260 
00261     def format_date(self, date, gmt_offset=0, relative=True, shorter=False,
00262                     full_format=False):
00263         """Formats the given date (which should be GMT).
00264 
00265         By default, we return a relative time (e.g., "2 minutes ago"). You
00266         can return an absolute date string with relative=False.
00267 
00268         You can force a full format date ("July 10, 1980") with
00269         full_format=True.
00270 
00271         This method is primarily intended for dates in the past.
00272         For dates in the future, we fall back to full format.
00273         """
00274         if self.code.startswith("ru"):
00275             relative = False
00276         if type(date) in (int, long, float):
00277             date = datetime.datetime.utcfromtimestamp(date)
00278         now = datetime.datetime.utcnow()
00279         if date > now:
00280             if relative and (date - now).seconds < 60:
00281                 # Due to click skew, things are some things slightly
00282                 # in the future. Round timestamps in the immediate
00283                 # future down to now in relative mode.
00284                 date = now
00285             else:
00286                 # Otherwise, future dates always use the full format.
00287                 full_format = True
00288         local_date = date - datetime.timedelta(minutes=gmt_offset)
00289         local_now = now - datetime.timedelta(minutes=gmt_offset)
00290         local_yesterday = local_now - datetime.timedelta(hours=24)
00291         difference = now - date
00292         seconds = difference.seconds
00293         days = difference.days
00294 
00295         _ = self.translate
00296         format = None
00297         if not full_format:
00298             if relative and days == 0:
00299                 if seconds < 50:
00300                     return _("1 second ago", "%(seconds)d seconds ago",
00301                              seconds) % {"seconds": seconds}
00302 
00303                 if seconds < 50 * 60:
00304                     minutes = round(seconds / 60.0)
00305                     return _("1 minute ago", "%(minutes)d minutes ago",
00306                              minutes) % {"minutes": minutes}
00307 
00308                 hours = round(seconds / (60.0 * 60))
00309                 return _("1 hour ago", "%(hours)d hours ago",
00310                          hours) % {"hours": hours}
00311 
00312             if days == 0:
00313                 format = _("%(time)s")
00314             elif days == 1 and local_date.day == local_yesterday.day and \
00315                  relative:
00316                 format = _("yesterday") if shorter else \
00317                          _("yesterday at %(time)s")
00318             elif days < 5:
00319                 format = _("%(weekday)s") if shorter else \
00320                          _("%(weekday)s at %(time)s")
00321             elif days < 334:  # 11mo, since confusing for same month last year
00322                 format = _("%(month_name)s %(day)s") if shorter else \
00323                          _("%(month_name)s %(day)s at %(time)s")
00324 
00325         if format is None:
00326             format = _("%(month_name)s %(day)s, %(year)s") if shorter else \
00327                      _("%(month_name)s %(day)s, %(year)s at %(time)s")
00328 
00329         tfhour_clock = self.code not in ("en", "en_US", "zh_CN")
00330         if tfhour_clock:
00331             str_time = "%d:%02d" % (local_date.hour, local_date.minute)
00332         elif self.code == "zh_CN":
00333             str_time = "%s%d:%02d" % (
00334                 (u'\u4e0a\u5348', u'\u4e0b\u5348')[local_date.hour >= 12],
00335                 local_date.hour % 12 or 12, local_date.minute)
00336         else:
00337             str_time = "%d:%02d %s" % (
00338                 local_date.hour % 12 or 12, local_date.minute,
00339                 ("am", "pm")[local_date.hour >= 12])
00340 
00341         return format % {
00342             "month_name": self._months[local_date.month - 1],
00343             "weekday": self._weekdays[local_date.weekday()],
00344             "day": str(local_date.day),
00345             "year": str(local_date.year),
00346             "time": str_time
00347         }
00348 
00349     def format_day(self, date, gmt_offset=0, dow=True):
00350         """Formats the given date as a day of week.
00351 
00352         Example: "Monday, January 22". You can remove the day of week with
00353         dow=False.
00354         """
00355         local_date = date - datetime.timedelta(minutes=gmt_offset)
00356         _ = self.translate
00357         if dow:
00358             return _("%(weekday)s, %(month_name)s %(day)s") % {
00359                 "month_name": self._months[local_date.month - 1],
00360                 "weekday": self._weekdays[local_date.weekday()],
00361                 "day": str(local_date.day),
00362             }
00363         else:
00364             return _("%(month_name)s %(day)s") % {
00365                 "month_name": self._months[local_date.month - 1],
00366                 "day": str(local_date.day),
00367             }
00368 
00369     def list(self, parts):
00370         """Returns a comma-separated list for the given list of parts.
00371 
00372         The format is, e.g., "A, B and C", "A and B" or just "A" for lists
00373         of size 1.
00374         """
00375         _ = self.translate
00376         if len(parts) == 0:
00377             return ""
00378         if len(parts) == 1:
00379             return parts[0]
00380         comma = u' \u0648 ' if self.code.startswith("fa") else u", "
00381         return _("%(commas)s and %(last)s") % {
00382             "commas": comma.join(parts[:-1]),
00383             "last": parts[len(parts) - 1],
00384         }
00385 
00386     def friendly_number(self, value):
00387         """Returns a comma-separated number for the given integer."""
00388         if self.code not in ("en", "en_US"):
00389             return str(value)
00390         value = str(value)
00391         parts = []
00392         while value:
00393             parts.append(value[-3:])
00394             value = value[:-3]
00395         return ",".join(reversed(parts))
00396 
00397 
00398 class CSVLocale(Locale):
00399     """Locale implementation using tornado's CSV translation format."""
00400     def translate(self, message, plural_message=None, count=None):
00401         if plural_message is not None:
00402             assert count is not None
00403             if count != 1:
00404                 message = plural_message
00405                 message_dict = self.translations.get("plural", {})
00406             else:
00407                 message_dict = self.translations.get("singular", {})
00408         else:
00409             message_dict = self.translations.get("unknown", {})
00410         return message_dict.get(message, message)
00411 
00412 
00413 class GettextLocale(Locale):
00414     """Locale implementation using the gettext module."""
00415     def translate(self, message, plural_message=None, count=None):
00416         if plural_message is not None:
00417             assert count is not None
00418             return self.translations.ungettext(message, plural_message, count)
00419         else:
00420             return self.translations.ugettext(message)
00421 
00422 LOCALE_NAMES = {
00423     "af_ZA": {"name_en": u"Afrikaans", "name": u"Afrikaans"},
00424     "am_ET": {"name_en": u"Amharic", "name": u'\u12a0\u121b\u122d\u129b'},
00425     "ar_AR": {"name_en": u"Arabic", "name": u"\u0627\u0644\u0639\u0631\u0628\u064a\u0629"},
00426     "bg_BG": {"name_en": u"Bulgarian", "name": u"\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"},
00427     "bn_IN": {"name_en": u"Bengali", "name": u"\u09ac\u09be\u0982\u09b2\u09be"},
00428     "bs_BA": {"name_en": u"Bosnian", "name": u"Bosanski"},
00429     "ca_ES": {"name_en": u"Catalan", "name": u"Catal\xe0"},
00430     "cs_CZ": {"name_en": u"Czech", "name": u"\u010ce\u0161tina"},
00431     "cy_GB": {"name_en": u"Welsh", "name": u"Cymraeg"},
00432     "da_DK": {"name_en": u"Danish", "name": u"Dansk"},
00433     "de_DE": {"name_en": u"German", "name": u"Deutsch"},
00434     "el_GR": {"name_en": u"Greek", "name": u"\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"},
00435     "en_GB": {"name_en": u"English (UK)", "name": u"English (UK)"},
00436     "en_US": {"name_en": u"English (US)", "name": u"English (US)"},
00437     "es_ES": {"name_en": u"Spanish (Spain)", "name": u"Espa\xf1ol (Espa\xf1a)"},
00438     "es_LA": {"name_en": u"Spanish", "name": u"Espa\xf1ol"},
00439     "et_EE": {"name_en": u"Estonian", "name": u"Eesti"},
00440     "eu_ES": {"name_en": u"Basque", "name": u"Euskara"},
00441     "fa_IR": {"name_en": u"Persian", "name": u"\u0641\u0627\u0631\u0633\u06cc"},
00442     "fi_FI": {"name_en": u"Finnish", "name": u"Suomi"},
00443     "fr_CA": {"name_en": u"French (Canada)", "name": u"Fran\xe7ais (Canada)"},
00444     "fr_FR": {"name_en": u"French", "name": u"Fran\xe7ais"},
00445     "ga_IE": {"name_en": u"Irish", "name": u"Gaeilge"},
00446     "gl_ES": {"name_en": u"Galician", "name": u"Galego"},
00447     "he_IL": {"name_en": u"Hebrew", "name": u"\u05e2\u05d1\u05e8\u05d9\u05ea"},
00448     "hi_IN": {"name_en": u"Hindi", "name": u"\u0939\u093f\u0928\u094d\u0926\u0940"},
00449     "hr_HR": {"name_en": u"Croatian", "name": u"Hrvatski"},
00450     "hu_HU": {"name_en": u"Hungarian", "name": u"Magyar"},
00451     "id_ID": {"name_en": u"Indonesian", "name": u"Bahasa Indonesia"},
00452     "is_IS": {"name_en": u"Icelandic", "name": u"\xcdslenska"},
00453     "it_IT": {"name_en": u"Italian", "name": u"Italiano"},
00454     "ja_JP": {"name_en": u"Japanese", "name": u"\u65e5\u672c\u8a9e"},
00455     "ko_KR": {"name_en": u"Korean", "name": u"\ud55c\uad6d\uc5b4"},
00456     "lt_LT": {"name_en": u"Lithuanian", "name": u"Lietuvi\u0173"},
00457     "lv_LV": {"name_en": u"Latvian", "name": u"Latvie\u0161u"},
00458     "mk_MK": {"name_en": u"Macedonian", "name": u"\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438"},
00459     "ml_IN": {"name_en": u"Malayalam", "name": u"\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02"},
00460     "ms_MY": {"name_en": u"Malay", "name": u"Bahasa Melayu"},
00461     "nb_NO": {"name_en": u"Norwegian (bokmal)", "name": u"Norsk (bokm\xe5l)"},
00462     "nl_NL": {"name_en": u"Dutch", "name": u"Nederlands"},
00463     "nn_NO": {"name_en": u"Norwegian (nynorsk)", "name": u"Norsk (nynorsk)"},
00464     "pa_IN": {"name_en": u"Punjabi", "name": u"\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40"},
00465     "pl_PL": {"name_en": u"Polish", "name": u"Polski"},
00466     "pt_BR": {"name_en": u"Portuguese (Brazil)", "name": u"Portugu\xeas (Brasil)"},
00467     "pt_PT": {"name_en": u"Portuguese (Portugal)", "name": u"Portugu\xeas (Portugal)"},
00468     "ro_RO": {"name_en": u"Romanian", "name": u"Rom\xe2n\u0103"},
00469     "ru_RU": {"name_en": u"Russian", "name": u"\u0420\u0443\u0441\u0441\u043a\u0438\u0439"},
00470     "sk_SK": {"name_en": u"Slovak", "name": u"Sloven\u010dina"},
00471     "sl_SI": {"name_en": u"Slovenian", "name": u"Sloven\u0161\u010dina"},
00472     "sq_AL": {"name_en": u"Albanian", "name": u"Shqip"},
00473     "sr_RS": {"name_en": u"Serbian", "name": u"\u0421\u0440\u043f\u0441\u043a\u0438"},
00474     "sv_SE": {"name_en": u"Swedish", "name": u"Svenska"},
00475     "sw_KE": {"name_en": u"Swahili", "name": u"Kiswahili"},
00476     "ta_IN": {"name_en": u"Tamil", "name": u"\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"},
00477     "te_IN": {"name_en": u"Telugu", "name": u"\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41"},
00478     "th_TH": {"name_en": u"Thai", "name": u"\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22"},
00479     "tl_PH": {"name_en": u"Filipino", "name": u"Filipino"},
00480     "tr_TR": {"name_en": u"Turkish", "name": u"T\xfcrk\xe7e"},
00481     "uk_UA": {"name_en": u"Ukraini ", "name": u"\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"},
00482     "vi_VN": {"name_en": u"Vietnamese", "name": u"Ti\u1ebfng Vi\u1ec7t"},
00483     "zh_CN": {"name_en": u"Chinese (Simplified)", "name": u"\u4e2d\u6587(\u7b80\u4f53)"},
00484     "zh_TW": {"name_en": u"Chinese (Traditional)", "name": u"\u4e2d\u6587(\u7e41\u9ad4)"},
00485 }


roswww
Author(s): Jonathan Mace
autogenerated on Thu Jan 2 2014 11:53:30