format_hu.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright 2017 Mycroft AI Inc.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 
18 from mycroft.util.lang.format_common import convert_to_mixed_fraction
19 from math import floor
20 
21 months = ['január', 'február', 'március', 'április', 'május', 'június',
22  'július', 'augusztus', 'szeptember', 'október', 'november',
23  'december']
24 
25 NUM_STRING_HU = {
26  0: 'nulla',
27  1: 'egy',
28  2: 'kettő',
29  3: 'három',
30  4: 'négy',
31  5: 'öt',
32  6: 'hat',
33  7: 'hét',
34  8: 'nyolc',
35  9: 'kilenc',
36  10: 'tíz',
37  11: 'tizenegy',
38  12: 'tizenkettő',
39  13: 'tizenhárom',
40  14: 'tizennégy',
41  15: 'tizenöt',
42  16: 'tizenhat',
43  17: 'tizenhét',
44  18: 'tizennyolc',
45  19: 'tizenkilenc',
46  20: 'húsz',
47  30: 'harminc',
48  40: 'negyven',
49  50: 'ötven',
50  60: 'hatvan',
51  70: 'hetven',
52  80: 'nyolcvan',
53  90: 'kilencven',
54  100: 'száz'
55 }
56 
57 # Hungarian uses "long scale"
58 # https://en.wikipedia.org/wiki/Long_and_short_scales
59 # Currently, numbers are limited to 1000000000000000000000000,
60 # but NUM_POWERS_OF_TEN can be extended to include additional number words
61 
62 NUM_POWERS_OF_TEN = [
63  '', 'ezer', 'millió', 'milliárd', 'billió', 'billiárd', 'trillió',
64  'trilliárd'
65 ]
66 
67 FRACTION_STRING_HU = {
68  2: 'fél',
69  3: 'harmad',
70  4: 'negyed',
71  5: 'ötöd',
72  6: 'hatod',
73  7: 'heted',
74  8: 'nyolcad',
75  9: 'kilenced',
76  10: 'tized',
77  11: 'tizenegyed',
78  12: 'tizenketted',
79  13: 'tizenharmad',
80  14: 'tizennegyed',
81  15: 'tizenötöd',
82  16: 'tizenhatod',
83  17: 'tizenheted',
84  18: 'tizennyolcad',
85  19: 'tizenkilenced',
86  20: 'huszad'
87 }
88 
89 # Numbers below 2 thousand are written in one word in Hungarian
90 # Numbers above 2 thousand are separated by hyphens
91 # In some circumstances it may better to seperate individual words
92 # Set EXTRA_SPACE=" " for separating numbers below 2 thousand (
93 # orthographically incorrect)
94 # Set EXTRA_SPACE="" for correct spelling, this is standard
95 
96 # EXTRA_SPACE = " "
97 EXTRA_SPACE = ""
98 
99 
100 def _get_vocal_type(word):
101  # checks the vocal attributes of a word
102  vowels_high = len([char for char in word if char in 'eéiíöőüű'])
103  vowels_low = len([char for char in word if char in 'aáoóuú'])
104  if vowels_high != 0 and vowels_low != 0:
105  return 2 # 2: type is mixed
106  return 0 if vowels_high == 0 else 1 # 0: type is low, 1: is high
107 
108 
109 def nice_number_hu(number, speech, denominators):
110  """ Hungarian helper for nice_number
111 
112  This function formats a float to human understandable functions. Like
113  4.5 becomes "4 és fél" for speech and "4 1/2" for text
114 
115  Args:
116  number (int or float): the float to format
117  speech (bool): format for speech (True) or display (False)
118  denominators (iter of ints): denominators to use, default [1 .. 20]
119  Returns:
120  (str): The formatted string.
121  """
122 
123  result = convert_to_mixed_fraction(number, denominators)
124  if not result:
125  # Give up, just represent as a 3 decimal number
126  return str(round(number, 3)).replace(".", ",")
127 
128  whole, num, den = result
129 
130  if not speech:
131  if num == 0:
132  # TODO: Number grouping? E.g. "1,000,000"
133  return str(whole)
134  else:
135  return '{} {}/{}'.format(whole, num, den)
136 
137  if num == 0:
138  return str(whole)
139  den_str = FRACTION_STRING_HU[den]
140  if whole == 0:
141  if num == 1:
142  one = 'egy ' if den != 2 else ''
143  return_string = '{}{}'.format(one, den_str)
144  else:
145  return_string = '{} {}'.format(num, den_str)
146  elif num == 1:
147  pointOne = 'egész egy' if den != 2 else 'és'
148  return_string = '{} {} {}'.format(whole, pointOne, den_str)
149  else:
150  return_string = '{} egész {} {}'.format(whole, num, den_str)
151  return return_string
152 
153 
154 def pronounce_number_hu(num, places=2):
155  """
156  Convert a number to its spoken equivalent
157 
158  For example, '5.2' would return 'öt egész két tized'
159 
160  Args:
161  num(float or int): the number to pronounce (set limit below)
162  places(int): maximum decimal places to speak
163  Returns:
164  (str): The pronounced number
165  """
166 
167  def pronounce_triplet_hu(num):
168  result = ""
169  num = floor(num)
170  if num > 99:
171  hundreds = floor(num / 100)
172  if hundreds > 0:
173  hundredConst = EXTRA_SPACE + 'száz' + EXTRA_SPACE
174  if hundreds == 1:
175  result += hundredConst
176  elif hundreds == 2:
177  result += 'két' + hundredConst
178  else:
179  result += NUM_STRING_HU[hundreds] + hundredConst
180  num -= hundreds * 100
181  if num == 0:
182  result += '' # do nothing
183  elif num <= 20:
184  result += NUM_STRING_HU[num] # + EXTRA_SPACE
185  elif num > 20:
186  ones = num % 10
187  tens = num - ones
188  if tens > 0:
189  if tens != 20:
190  result += NUM_STRING_HU[tens] + EXTRA_SPACE
191  else:
192  result += "huszon" + EXTRA_SPACE
193  if ones > 0:
194  result += NUM_STRING_HU[ones] + EXTRA_SPACE
195  return result
196 
197  def pronounce_whole_number_hu(num, scale_level=0):
198  if num == 0:
199  return ''
200 
201  num = floor(num)
202  result = ''
203  last_triplet = num % 1000
204 
205  if last_triplet == 1:
206  if scale_level == 0:
207  if result != '':
208  result += '' + "egy"
209  else:
210  result += "egy"
211  elif scale_level == 1:
212  result += EXTRA_SPACE + NUM_POWERS_OF_TEN[1] + EXTRA_SPACE
213  else:
214  result += "egy" + NUM_POWERS_OF_TEN[scale_level]
215  elif last_triplet > 1:
216  result += pronounce_triplet_hu(last_triplet)
217  if scale_level != 0:
218  result = result.replace(NUM_STRING_HU[2], 'két')
219  if scale_level == 1:
220  result += NUM_POWERS_OF_TEN[1] + EXTRA_SPACE
221  if scale_level >= 2:
222  result += NUM_POWERS_OF_TEN[scale_level]
223  if scale_level > 0:
224  result += '-'
225 
226  num = floor(num / 1000)
227  scale_level += 1
228  return pronounce_whole_number_hu(num,
229  scale_level) + result
230 
231  result = ""
232  if abs(num) >= 1000000000000000000000000: # cannot do more than this
233  return str(num)
234  elif num == 0:
235  return str(NUM_STRING_HU[0])
236  elif num < 0:
237  return "mínusz " + pronounce_number_hu(abs(num), places)
238  else:
239  if num == int(num):
240  return pronounce_whole_number_hu(num).strip('-')
241  else:
242  whole_number_part = floor(num)
243  fractional_part = num - whole_number_part
244  if whole_number_part == 0:
245  result += NUM_STRING_HU[0]
246  result += pronounce_whole_number_hu(whole_number_part)
247  if places > 0:
248  result += " egész "
249  fraction = pronounce_whole_number_hu(
250  round(fractional_part * 10 ** places))
251  result += fraction.replace(NUM_STRING_HU[2], 'két')
252  fraction_suffixes = [
253  'tized', 'század', 'ezred', 'tízezred', 'százezred']
254  if places <= len(fraction_suffixes):
255  result += ' ' + fraction_suffixes[places - 1]
256  return result
257 
258 
260  ordinals = ["nulladik", "első", "második", "harmadik", "negyedik",
261  "ötödik", "hatodik", "hetedik", "nyolcadik", "kilencedik",
262  "tizedik"]
263  big_ordinals = ["", "ezredik", "milliomodik"]
264 
265  # only for whole positive numbers including zero
266  if num < 0 or num != int(num):
267  return num
268  elif num < 11:
269  return ordinals[num]
270  else:
271  # concatenate parts and inflect them accordingly
272  root = pronounce_number_hu(num)
273  vtype = _get_vocal_type(root)
274  last_digit = num - floor(num/10) * 10
275  if root == "húsz":
276  root = "husz"
277  if num % 1000000 == 0:
278  return root.replace(NUM_POWERS_OF_TEN[2], big_ordinals[2])
279  if num % 1000 == 0:
280  return root.replace(NUM_POWERS_OF_TEN[1], big_ordinals[1])
281  if last_digit == 1:
282  return root + "edik"
283  elif root[-1] == 'ő':
284  return root[:-1] + 'edik'
285  elif last_digit != 0:
286  return ordinals[last_digit].join(
287  root.rsplit(NUM_STRING_HU[last_digit], 1))
288  return root + "edik" if vtype == 1 else root + "adik"
289 
290 
291 def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False):
292  """
293  Format a time to a comfortable human format
294 
295  For example, generate 'five thirty' for speech or '5:30' for
296  text display.
297 
298  Args:
299  dt (datetime): date to format (assumes already in local timezone)
300  speech (bool): format for speech (default/True) or display (False)=Fal
301  use_24hour (bool): output in 24-hour/military or 12-hour format
302  use_ampm (bool): include the am/pm for 12-hour format
303  Returns:
304  (str): The formatted time string
305  """
306  if use_24hour:
307  # e.g. "03:01" or "14:22"
308  string = dt.strftime("%H:%M")
309  else:
310  if use_ampm:
311  # e.g. "3:01 AM" or "2:22 PM"
312  string = dt.strftime("%I:%M %p")
313  else:
314  # e.g. "3:01" or "2:22"
315  string = dt.strftime("%I:%M")
316  if string[0] == '0':
317  string = string[1:] # strip leading zeros
318 
319  if not speech:
320  return string
321 
322  # Generate a speakable version of the time
323  speak = ""
324  if use_24hour:
325  speak += pronounce_number_hu(dt.hour)
326  speak = speak.replace(NUM_STRING_HU[2], 'két')
327  speak += " óra"
328  if not dt.minute == 0: # zero minutes are not pronounced
329  speak += " " + pronounce_number_hu(dt.minute)
330 
331  return speak # ampm is ignored when use_24hour is true
332  else:
333  if dt.hour == 0 and dt.minute == 0:
334  return "éjfél"
335  if dt.hour == 12 and dt.minute == 0:
336  return "dél"
337  # TODO: "half past 3", "a quarter of 4" and other idiomatic times
338 
339  if dt.hour == 0:
340  speak += pronounce_number_hu(12)
341  elif dt.hour < 13:
342  speak = pronounce_number_hu(dt.hour)
343  else:
344  speak = pronounce_number_hu(dt.hour - 12)
345 
346  speak = speak.replace(NUM_STRING_HU[2], 'két')
347  speak += " óra"
348 
349  if not dt.minute == 0:
350  speak += " " + pronounce_number_hu(dt.minute)
351 
352  if use_ampm:
353  if dt.hour > 11:
354  if dt.hour < 18:
355  speak = "délután " + speak # 12:01 - 17:59
356  elif dt.hour < 22:
357  speak = "este " + speak # 18:00 - 21:59 este/evening
358  else:
359  speak = "éjjel " + speak # 22:00 - 23:59 éjjel/at night
360  elif dt.hour < 3:
361  speak = "éjjel " + speak # 00:01 - 02:59 éjjel/at night
362  else:
363  speak = "reggel " + speak # 03:00 - 11:59 reggel/in t. morning
364 
365  return speak
def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False)
Definition: format_hu.py:291
def nice_number_hu(number, speech, denominators)
Definition: format_hu.py:109
def pronounce_number_hu(num, places=2)
Definition: format_hu.py:154
def convert_to_mixed_fraction(number, denominators)


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40