00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 """Translation methods for generating localized strings.
00018
00019 To load a locale and generate a translated string::
00020
00021 user_locale = tornado.locale.get("es_LA")
00022 print user_locale.translate("Sign out")
00023
00024 `tornado.locale.get()` returns the closest matching locale, not necessarily the
00025 specific locale you requested. You can support pluralization with
00026 additional arguments to `~Locale.translate()`, e.g.::
00027
00028 people = [...]
00029 message = user_locale.translate(
00030 "%(list)s is online", "%(list)s are online", len(people))
00031 print message % {"list": user_locale.list(people)}
00032
00033 The first string is chosen if ``len(people) == 1``, otherwise the second
00034 string is chosen.
00035
00036 Applications should call one of `load_translations` (which uses a simple
00037 CSV format) or `load_gettext_translations` (which uses the ``.mo`` format
00038 supported by `gettext` and related tools). If neither method is called,
00039 the `Locale.translate` method will simply return the original string.
00040 """
00041
00042 from __future__ import absolute_import, division, print_function, with_statement
00043
00044 import csv
00045 import datetime
00046 import numbers
00047 import os
00048 import re
00049
00050 from tornado import escape
00051 from tornado.log import gen_log
00052 from tornado.util import u
00053
00054 _default_locale = "en_US"
00055 _translations = {}
00056 _supported_locales = frozenset([_default_locale])
00057 _use_gettext = False
00058
00059
00060 def get(*locale_codes):
00061 """Returns the closest match for the given locale codes.
00062
00063 We iterate over all given locale codes in order. If we have a tight
00064 or a loose match for the code (e.g., "en" for "en_US"), we return
00065 the locale. Otherwise we move to the next code in the list.
00066
00067 By default we return ``en_US`` if no translations are found for any of
00068 the specified locales. You can change the default locale with
00069 `set_default_locale()`.
00070 """
00071 return Locale.get_closest(*locale_codes)
00072
00073
00074 def set_default_locale(code):
00075 """Sets the default locale.
00076
00077 The default locale is assumed to be the language used for all strings
00078 in the system. The translations loaded from disk are mappings from
00079 the default locale to the destination locale. Consequently, you don't
00080 need to create a translation file for the default locale.
00081 """
00082 global _default_locale
00083 global _supported_locales
00084 _default_locale = code
00085 _supported_locales = frozenset(list(_translations.keys()) + [_default_locale])
00086
00087
00088 def load_translations(directory):
00089 """Loads translations from CSV files in a directory.
00090
00091 Translations are strings with optional Python-style named placeholders
00092 (e.g., ``My name is %(name)s``) and their associated translations.
00093
00094 The directory should have translation files of the form ``LOCALE.csv``,
00095 e.g. ``es_GT.csv``. The CSV files should have two or three columns: string,
00096 translation, and an optional plural indicator. Plural indicators should
00097 be one of "plural" or "singular". A given string can have both singular
00098 and plural forms. For example ``%(name)s liked this`` may have a
00099 different verb conjugation depending on whether %(name)s is one
00100 name or a list of names. There should be two rows in the CSV file for
00101 that string, one with plural indicator "singular", and one "plural".
00102 For strings with no verbs that would change on translation, simply
00103 use "unknown" or the empty string (or don't include the column at all).
00104
00105 The file is read using the `csv` module in the default "excel" dialect.
00106 In this format there should not be spaces after the commas.
00107
00108 Example translation ``es_LA.csv``::
00109
00110 "I love you","Te amo"
00111 "%(name)s liked this","A %(name)s les gustó esto","plural"
00112 "%(name)s liked this","A %(name)s le gustó esto","singular"
00113
00114 """
00115 global _translations
00116 global _supported_locales
00117 _translations = {}
00118 for path in os.listdir(directory):
00119 if not path.endswith(".csv"):
00120 continue
00121 locale, extension = path.split(".")
00122 if not re.match("[a-z]+(_[A-Z]+)?$", locale):
00123 gen_log.error("Unrecognized locale %r (path: %s)", locale,
00124 os.path.join(directory, path))
00125 continue
00126 full_path = os.path.join(directory, path)
00127 try:
00128
00129
00130 f = open(full_path, "r", encoding="utf-8")
00131 except TypeError:
00132
00133 f = open(full_path, "r")
00134 _translations[locale] = {}
00135 for i, row in enumerate(csv.reader(f)):
00136 if not row or len(row) < 2:
00137 continue
00138 row = [escape.to_unicode(c).strip() for c in row]
00139 english, translation = row[:2]
00140 if len(row) > 2:
00141 plural = row[2] or "unknown"
00142 else:
00143 plural = "unknown"
00144 if plural not in ("plural", "singular", "unknown"):
00145 gen_log.error("Unrecognized plural indicator %r in %s line %d",
00146 plural, path, i + 1)
00147 continue
00148 _translations[locale].setdefault(plural, {})[english] = translation
00149 f.close()
00150 _supported_locales = frozenset(list(_translations.keys()) + [_default_locale])
00151 gen_log.debug("Supported locales: %s", sorted(_supported_locales))
00152
00153
00154 def load_gettext_translations(directory, domain):
00155 """Loads translations from `gettext`'s locale tree
00156
00157 Locale tree is similar to system's ``/usr/share/locale``, like::
00158
00159 {directory}/{lang}/LC_MESSAGES/{domain}.mo
00160
00161 Three steps are required to have you app translated:
00162
00163 1. Generate POT translation file::
00164
00165 xgettext --language=Python --keyword=_:1,2 -d mydomain file1.py file2.html etc
00166
00167 2. Merge against existing POT file::
00168
00169 msgmerge old.po mydomain.po > new.po
00170
00171 3. Compile::
00172
00173 msgfmt mydomain.po -o {directory}/pt_BR/LC_MESSAGES/mydomain.mo
00174 """
00175 import gettext
00176 global _translations
00177 global _supported_locales
00178 global _use_gettext
00179 _translations = {}
00180 for lang in os.listdir(directory):
00181 if lang.startswith('.'):
00182 continue
00183 if os.path.isfile(os.path.join(directory, lang)):
00184 continue
00185 try:
00186 os.stat(os.path.join(directory, lang, "LC_MESSAGES", domain + ".mo"))
00187 _translations[lang] = gettext.translation(domain, directory,
00188 languages=[lang])
00189 except Exception as e:
00190 gen_log.error("Cannot load translation for '%s': %s", lang, str(e))
00191 continue
00192 _supported_locales = frozenset(list(_translations.keys()) + [_default_locale])
00193 _use_gettext = True
00194 gen_log.debug("Supported locales: %s", sorted(_supported_locales))
00195
00196
00197 def get_supported_locales():
00198 """Returns a list of all the supported locale codes."""
00199 return _supported_locales
00200
00201
00202 class Locale(object):
00203 """Object representing a locale.
00204
00205 After calling one of `load_translations` or `load_gettext_translations`,
00206 call `get` or `get_closest` to get a Locale object.
00207 """
00208 @classmethod
00209 def get_closest(cls, *locale_codes):
00210 """Returns the closest match for the given locale code."""
00211 for code in locale_codes:
00212 if not code:
00213 continue
00214 code = code.replace("-", "_")
00215 parts = code.split("_")
00216 if len(parts) > 2:
00217 continue
00218 elif len(parts) == 2:
00219 code = parts[0].lower() + "_" + parts[1].upper()
00220 if code in _supported_locales:
00221 return cls.get(code)
00222 if parts[0].lower() in _supported_locales:
00223 return cls.get(parts[0].lower())
00224 return cls.get(_default_locale)
00225
00226 @classmethod
00227 def get(cls, code):
00228 """Returns the Locale for the given locale code.
00229
00230 If it is not supported, we raise an exception.
00231 """
00232 if not hasattr(cls, "_cache"):
00233 cls._cache = {}
00234 if code not in cls._cache:
00235 assert code in _supported_locales
00236 translations = _translations.get(code, None)
00237 if translations is None:
00238 locale = CSVLocale(code, {})
00239 elif _use_gettext:
00240 locale = GettextLocale(code, translations)
00241 else:
00242 locale = CSVLocale(code, translations)
00243 cls._cache[code] = locale
00244 return cls._cache[code]
00245
00246 def __init__(self, code, translations):
00247 self.code = code
00248 self.name = LOCALE_NAMES.get(code, {}).get("name", u("Unknown"))
00249 self.rtl = False
00250 for prefix in ["fa", "ar", "he"]:
00251 if self.code.startswith(prefix):
00252 self.rtl = True
00253 break
00254 self.translations = translations
00255
00256
00257 _ = self.translate
00258 self._months = [
00259 _("January"), _("February"), _("March"), _("April"),
00260 _("May"), _("June"), _("July"), _("August"),
00261 _("September"), _("October"), _("November"), _("December")]
00262 self._weekdays = [
00263 _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"),
00264 _("Friday"), _("Saturday"), _("Sunday")]
00265
00266 def translate(self, message, plural_message=None, count=None):
00267 """Returns the translation for the given message for this locale.
00268
00269 If ``plural_message`` is given, you must also provide
00270 ``count``. We return ``plural_message`` when ``count != 1``,
00271 and we return the singular form for the given message when
00272 ``count == 1``.
00273 """
00274 raise NotImplementedError()
00275
00276 def format_date(self, date, gmt_offset=0, relative=True, shorter=False,
00277 full_format=False):
00278 """Formats the given date (which should be GMT).
00279
00280 By default, we return a relative time (e.g., "2 minutes ago"). You
00281 can return an absolute date string with ``relative=False``.
00282
00283 You can force a full format date ("July 10, 1980") with
00284 ``full_format=True``.
00285
00286 This method is primarily intended for dates in the past.
00287 For dates in the future, we fall back to full format.
00288 """
00289 if isinstance(date, numbers.Real):
00290 date = datetime.datetime.utcfromtimestamp(date)
00291 now = datetime.datetime.utcnow()
00292 if date > now:
00293 if relative and (date - now).seconds < 60:
00294
00295
00296
00297 date = now
00298 else:
00299
00300 full_format = True
00301 local_date = date - datetime.timedelta(minutes=gmt_offset)
00302 local_now = now - datetime.timedelta(minutes=gmt_offset)
00303 local_yesterday = local_now - datetime.timedelta(hours=24)
00304 difference = now - date
00305 seconds = difference.seconds
00306 days = difference.days
00307
00308 _ = self.translate
00309 format = None
00310 if not full_format:
00311 if relative and days == 0:
00312 if seconds < 50:
00313 return _("1 second ago", "%(seconds)d seconds ago",
00314 seconds) % {"seconds": seconds}
00315
00316 if seconds < 50 * 60:
00317 minutes = round(seconds / 60.0)
00318 return _("1 minute ago", "%(minutes)d minutes ago",
00319 minutes) % {"minutes": minutes}
00320
00321 hours = round(seconds / (60.0 * 60))
00322 return _("1 hour ago", "%(hours)d hours ago",
00323 hours) % {"hours": hours}
00324
00325 if days == 0:
00326 format = _("%(time)s")
00327 elif days == 1 and local_date.day == local_yesterday.day and \
00328 relative:
00329 format = _("yesterday") if shorter else \
00330 _("yesterday at %(time)s")
00331 elif days < 5:
00332 format = _("%(weekday)s") if shorter else \
00333 _("%(weekday)s at %(time)s")
00334 elif days < 334:
00335 format = _("%(month_name)s %(day)s") if shorter else \
00336 _("%(month_name)s %(day)s at %(time)s")
00337
00338 if format is None:
00339 format = _("%(month_name)s %(day)s, %(year)s") if shorter else \
00340 _("%(month_name)s %(day)s, %(year)s at %(time)s")
00341
00342 tfhour_clock = self.code not in ("en", "en_US", "zh_CN")
00343 if tfhour_clock:
00344 str_time = "%d:%02d" % (local_date.hour, local_date.minute)
00345 elif self.code == "zh_CN":
00346 str_time = "%s%d:%02d" % (
00347 (u('\u4e0a\u5348'), u('\u4e0b\u5348'))[local_date.hour >= 12],
00348 local_date.hour % 12 or 12, local_date.minute)
00349 else:
00350 str_time = "%d:%02d %s" % (
00351 local_date.hour % 12 or 12, local_date.minute,
00352 ("am", "pm")[local_date.hour >= 12])
00353
00354 return format % {
00355 "month_name": self._months[local_date.month - 1],
00356 "weekday": self._weekdays[local_date.weekday()],
00357 "day": str(local_date.day),
00358 "year": str(local_date.year),
00359 "time": str_time
00360 }
00361
00362 def format_day(self, date, gmt_offset=0, dow=True):
00363 """Formats the given date as a day of week.
00364
00365 Example: "Monday, January 22". You can remove the day of week with
00366 ``dow=False``.
00367 """
00368 local_date = date - datetime.timedelta(minutes=gmt_offset)
00369 _ = self.translate
00370 if dow:
00371 return _("%(weekday)s, %(month_name)s %(day)s") % {
00372 "month_name": self._months[local_date.month - 1],
00373 "weekday": self._weekdays[local_date.weekday()],
00374 "day": str(local_date.day),
00375 }
00376 else:
00377 return _("%(month_name)s %(day)s") % {
00378 "month_name": self._months[local_date.month - 1],
00379 "day": str(local_date.day),
00380 }
00381
00382 def list(self, parts):
00383 """Returns a comma-separated list for the given list of parts.
00384
00385 The format is, e.g., "A, B and C", "A and B" or just "A" for lists
00386 of size 1.
00387 """
00388 _ = self.translate
00389 if len(parts) == 0:
00390 return ""
00391 if len(parts) == 1:
00392 return parts[0]
00393 comma = u(' \u0648 ') if self.code.startswith("fa") else u(", ")
00394 return _("%(commas)s and %(last)s") % {
00395 "commas": comma.join(parts[:-1]),
00396 "last": parts[len(parts) - 1],
00397 }
00398
00399 def friendly_number(self, value):
00400 """Returns a comma-separated number for the given integer."""
00401 if self.code not in ("en", "en_US"):
00402 return str(value)
00403 value = str(value)
00404 parts = []
00405 while value:
00406 parts.append(value[-3:])
00407 value = value[:-3]
00408 return ",".join(reversed(parts))
00409
00410
00411 class CSVLocale(Locale):
00412 """Locale implementation using tornado's CSV translation format."""
00413 def translate(self, message, plural_message=None, count=None):
00414 if plural_message is not None:
00415 assert count is not None
00416 if count != 1:
00417 message = plural_message
00418 message_dict = self.translations.get("plural", {})
00419 else:
00420 message_dict = self.translations.get("singular", {})
00421 else:
00422 message_dict = self.translations.get("unknown", {})
00423 return message_dict.get(message, message)
00424
00425
00426 class GettextLocale(Locale):
00427 """Locale implementation using the `gettext` module."""
00428 def __init__(self, code, translations):
00429 try:
00430
00431 self.ngettext = translations.ungettext
00432 self.gettext = translations.ugettext
00433 except AttributeError:
00434
00435 self.ngettext = translations.ngettext
00436 self.gettext = translations.gettext
00437
00438
00439 super(GettextLocale, self).__init__(code, translations)
00440
00441 def translate(self, message, plural_message=None, count=None):
00442 if plural_message is not None:
00443 assert count is not None
00444 return self.ngettext(message, plural_message, count)
00445 else:
00446 return self.gettext(message)
00447
00448 LOCALE_NAMES = {
00449 "af_ZA": {"name_en": u("Afrikaans"), "name": u("Afrikaans")},
00450 "am_ET": {"name_en": u("Amharic"), "name": u('\u12a0\u121b\u122d\u129b')},
00451 "ar_AR": {"name_en": u("Arabic"), "name": u("\u0627\u0644\u0639\u0631\u0628\u064a\u0629")},
00452 "bg_BG": {"name_en": u("Bulgarian"), "name": u("\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438")},
00453 "bn_IN": {"name_en": u("Bengali"), "name": u("\u09ac\u09be\u0982\u09b2\u09be")},
00454 "bs_BA": {"name_en": u("Bosnian"), "name": u("Bosanski")},
00455 "ca_ES": {"name_en": u("Catalan"), "name": u("Catal\xe0")},
00456 "cs_CZ": {"name_en": u("Czech"), "name": u("\u010ce\u0161tina")},
00457 "cy_GB": {"name_en": u("Welsh"), "name": u("Cymraeg")},
00458 "da_DK": {"name_en": u("Danish"), "name": u("Dansk")},
00459 "de_DE": {"name_en": u("German"), "name": u("Deutsch")},
00460 "el_GR": {"name_en": u("Greek"), "name": u("\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac")},
00461 "en_GB": {"name_en": u("English (UK)"), "name": u("English (UK)")},
00462 "en_US": {"name_en": u("English (US)"), "name": u("English (US)")},
00463 "es_ES": {"name_en": u("Spanish (Spain)"), "name": u("Espa\xf1ol (Espa\xf1a)")},
00464 "es_LA": {"name_en": u("Spanish"), "name": u("Espa\xf1ol")},
00465 "et_EE": {"name_en": u("Estonian"), "name": u("Eesti")},
00466 "eu_ES": {"name_en": u("Basque"), "name": u("Euskara")},
00467 "fa_IR": {"name_en": u("Persian"), "name": u("\u0641\u0627\u0631\u0633\u06cc")},
00468 "fi_FI": {"name_en": u("Finnish"), "name": u("Suomi")},
00469 "fr_CA": {"name_en": u("French (Canada)"), "name": u("Fran\xe7ais (Canada)")},
00470 "fr_FR": {"name_en": u("French"), "name": u("Fran\xe7ais")},
00471 "ga_IE": {"name_en": u("Irish"), "name": u("Gaeilge")},
00472 "gl_ES": {"name_en": u("Galician"), "name": u("Galego")},
00473 "he_IL": {"name_en": u("Hebrew"), "name": u("\u05e2\u05d1\u05e8\u05d9\u05ea")},
00474 "hi_IN": {"name_en": u("Hindi"), "name": u("\u0939\u093f\u0928\u094d\u0926\u0940")},
00475 "hr_HR": {"name_en": u("Croatian"), "name": u("Hrvatski")},
00476 "hu_HU": {"name_en": u("Hungarian"), "name": u("Magyar")},
00477 "id_ID": {"name_en": u("Indonesian"), "name": u("Bahasa Indonesia")},
00478 "is_IS": {"name_en": u("Icelandic"), "name": u("\xcdslenska")},
00479 "it_IT": {"name_en": u("Italian"), "name": u("Italiano")},
00480 "ja_JP": {"name_en": u("Japanese"), "name": u("\u65e5\u672c\u8a9e")},
00481 "ko_KR": {"name_en": u("Korean"), "name": u("\ud55c\uad6d\uc5b4")},
00482 "lt_LT": {"name_en": u("Lithuanian"), "name": u("Lietuvi\u0173")},
00483 "lv_LV": {"name_en": u("Latvian"), "name": u("Latvie\u0161u")},
00484 "mk_MK": {"name_en": u("Macedonian"), "name": u("\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438")},
00485 "ml_IN": {"name_en": u("Malayalam"), "name": u("\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02")},
00486 "ms_MY": {"name_en": u("Malay"), "name": u("Bahasa Melayu")},
00487 "nb_NO": {"name_en": u("Norwegian (bokmal)"), "name": u("Norsk (bokm\xe5l)")},
00488 "nl_NL": {"name_en": u("Dutch"), "name": u("Nederlands")},
00489 "nn_NO": {"name_en": u("Norwegian (nynorsk)"), "name": u("Norsk (nynorsk)")},
00490 "pa_IN": {"name_en": u("Punjabi"), "name": u("\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40")},
00491 "pl_PL": {"name_en": u("Polish"), "name": u("Polski")},
00492 "pt_BR": {"name_en": u("Portuguese (Brazil)"), "name": u("Portugu\xeas (Brasil)")},
00493 "pt_PT": {"name_en": u("Portuguese (Portugal)"), "name": u("Portugu\xeas (Portugal)")},
00494 "ro_RO": {"name_en": u("Romanian"), "name": u("Rom\xe2n\u0103")},
00495 "ru_RU": {"name_en": u("Russian"), "name": u("\u0420\u0443\u0441\u0441\u043a\u0438\u0439")},
00496 "sk_SK": {"name_en": u("Slovak"), "name": u("Sloven\u010dina")},
00497 "sl_SI": {"name_en": u("Slovenian"), "name": u("Sloven\u0161\u010dina")},
00498 "sq_AL": {"name_en": u("Albanian"), "name": u("Shqip")},
00499 "sr_RS": {"name_en": u("Serbian"), "name": u("\u0421\u0440\u043f\u0441\u043a\u0438")},
00500 "sv_SE": {"name_en": u("Swedish"), "name": u("Svenska")},
00501 "sw_KE": {"name_en": u("Swahili"), "name": u("Kiswahili")},
00502 "ta_IN": {"name_en": u("Tamil"), "name": u("\u0ba4\u0bae\u0bbf\u0bb4\u0bcd")},
00503 "te_IN": {"name_en": u("Telugu"), "name": u("\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41")},
00504 "th_TH": {"name_en": u("Thai"), "name": u("\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22")},
00505 "tl_PH": {"name_en": u("Filipino"), "name": u("Filipino")},
00506 "tr_TR": {"name_en": u("Turkish"), "name": u("T\xfcrk\xe7e")},
00507 "uk_UA": {"name_en": u("Ukraini "), "name": u("\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430")},
00508 "vi_VN": {"name_en": u("Vietnamese"), "name": u("Ti\u1ebfng Vi\u1ec7t")},
00509 "zh_CN": {"name_en": u("Chinese (Simplified)"), "name": u("\u4e2d\u6587(\u7b80\u4f53)")},
00510 "zh_TW": {"name_en": u("Chinese (Traditional)"), "name": u("\u4e2d\u6587(\u7e41\u9ad4)")},
00511 }