format_nl.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright 2017 Mycroft AI Inc.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 
18 from mycroft.util.lang.format_common import convert_to_mixed_fraction
19 from math import floor
20 
21 months = ['januari', 'februari', 'maart', 'april', 'mei', 'juni',
22  'juli', 'augustus', 'september', 'oktober', 'november',
23  'december']
24 
25 NUM_STRING_NL = {
26  0: 'nul',
27  1: u'één',
28  2: 'twee',
29  3: 'drie',
30  4: 'vier',
31  5: 'vijf',
32  6: 'zes',
33  7: 'zeven',
34  8: 'acht',
35  9: 'negen',
36  10: 'tien',
37  11: 'elf',
38  12: 'twaalf',
39  13: 'dertien',
40  14: 'veertien',
41  15: 'vijftien',
42  16: 'zestien',
43  17: 'zeventien',
44  18: 'actien',
45  19: 'negentien',
46  20: 'twintig',
47  30: 'dertig',
48  40: 'veertig',
49  50: 'vijftig',
50  60: 'zestig',
51  70: 'zeventig',
52  80: 'tachtig',
53  90: 'negentig',
54  100: 'honderd'
55 }
56 
57 # German uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
58 # Currently, numbers are limited to 1000000000000000000000000,
59 # but NUM_POWERS_OF_TEN can be extended to include additional number words
60 
61 
62 NUM_POWERS_OF_TEN = [
63  '', 'duizend', 'miljoen', 'miljard', 'biljoen', 'biljard', 'triljoen',
64  'triljard'
65 ]
66 
67 FRACTION_STRING_NL = {
68  2: 'half',
69  3: 'derde',
70  4: 'vierde',
71  5: 'vijfde',
72  6: 'zesde',
73  7: 'zevende',
74  8: 'achtste',
75  9: 'negende',
76  10: 'tiende',
77  11: 'elfde',
78  12: 'twaalfde',
79  13: 'dertiende',
80  14: 'veertiende',
81  15: 'vijftiende',
82  16: 'zestiende',
83  17: 'zeventiende',
84  18: 'achttiende',
85  19: 'negentiende',
86  20: 'twintigste'
87 }
88 
89 # Numbers below 1 million are written in one word in dutch, yielding very
90 # long words
91 # In some circumstances it may better to seperate individual words
92 # Set EXTRA_SPACE=" " for separating numbers below 1 million (
93 # orthographically incorrect)
94 # Set EXTRA_SPACE="" for correct spelling, this is standard
95 
96 # EXTRA_SPACE = " "
97 EXTRA_SPACE = ""
98 
99 
100 def nice_number_nl(number, speech, denominators):
101  """ Dutch helper for nice_number
102  This function formats a float to human understandable functions. Like
103  4.5 becomes "4 einhalb" for speech and "4 1/2" for text
104  Args:
105  number (int or float): the float to format
106  speech (bool): format for speech (True) or display (False)
107  denominators (iter of ints): denominators to use, default [1 .. 20]
108  Returns:
109  (str): The formatted string.
110  """
111  result = convert_to_mixed_fraction(number, denominators)
112  if not result:
113  # Give up, just represent as a 3 decimal number
114  return str(round(number, 3)).replace(".", ",")
115  whole, num, den = result
116  if not speech:
117  if num == 0:
118  # TODO: Number grouping? E.g. "1,000,000"
119  return str(whole)
120  else:
121  return '{} {}/{}'.format(whole, num, den)
122  if num == 0:
123  return str(whole)
124  den_str = FRACTION_STRING_NL[den]
125  if whole == 0:
126  if num == 1:
127  return_string = u'één {}'.format(den_str)
128  else:
129  return_string = '{} {}'.format(num, den_str)
130  elif num == 1:
131  return_string = u'{} en één {}'.format(whole, den_str)
132  else:
133  return_string = '{} en {} {}'.format(whole, num, den_str)
134 
135  return return_string
136 
137 
138 def pronounce_number_nl(num, places=2):
139  """
140  Convert a number to its spoken equivalent
141  For example, '5.2' would return 'five point two'
142  Args:
143  num(float or int): the number to pronounce (set limit below)
144  places(int): maximum decimal places to speak
145  Returns:
146  (str): The pronounced number
147 
148  """
149 
150  def pronounce_triplet_nl(num):
151  result = ""
152  num = floor(num)
153  if num > 99:
154  hundreds = floor(num / 100)
155  if hundreds > 0:
156  result += NUM_STRING_NL[
157  hundreds] + EXTRA_SPACE + 'honderd' + EXTRA_SPACE
158  num -= hundreds * 100
159  if num == 0:
160  result += '' # do nothing
161  elif num <= 20:
162  result += NUM_STRING_NL[num] # + EXTRA_SPACE
163  elif num > 20:
164  ones = num % 10
165  tens = num - ones
166  if ones > 0:
167  result += NUM_STRING_NL[ones] + EXTRA_SPACE
168  if tens > 0:
169  result += 'en' + EXTRA_SPACE
170  if tens > 0:
171  result += NUM_STRING_NL[tens] + EXTRA_SPACE
172  return result
173 
174  def pronounce_fractional_nl(num,
175  places): # fixed number of places even with
176  # trailing zeros
177  result = ""
178  place = 10
179  while places > 0: # doesn't work with 1.0001 and places = 2: int(
180  # num*place) % 10 > 0 and places > 0:
181  result += " " + NUM_STRING_NL[int(num * place) % 10]
182  if int(num * place) % 10 == 1:
183  result += '' # "1" is pronounced "eins" after the decimal
184  # point
185  place *= 10
186  places -= 1
187  return result
188 
189  def pronounce_whole_number_nl(num, scale_level=0):
190  if num == 0:
191  return ''
192 
193  num = floor(num)
194  result = ''
195  last_triplet = num % 1000
196 
197  if last_triplet == 1:
198  if scale_level == 0:
199  if result != '':
200  result += '' + u'één'
201  else:
202  result += u"één"
203  elif scale_level == 1:
204  result += u'één' + EXTRA_SPACE + 'duizend' + EXTRA_SPACE
205  else:
206  result += u"één " + NUM_POWERS_OF_TEN[scale_level] + ' '
207  elif last_triplet > 1:
208  result += pronounce_triplet_nl(last_triplet)
209  if scale_level == 1:
210  # result += EXTRA_SPACE
211  result += 'duizend' + EXTRA_SPACE
212  if scale_level >= 2:
213  # if EXTRA_SPACE == '':
214  # result += " "
215  result += " " + NUM_POWERS_OF_TEN[scale_level] + ' '
216  if scale_level >= 2:
217  if scale_level % 2 == 0:
218  result += "" # Miljioen
219  result += "" # Miljard, Miljoen
220 
221  num = floor(num / 1000)
222  scale_level += 1
223  return pronounce_whole_number_nl(num,
224  scale_level) + result + ''
225 
226  result = ""
227  if abs(num) >= 1000000000000000000000000: # cannot do more than this
228  return str(num)
229  elif num == 0:
230  return str(NUM_STRING_NL[0])
231  elif num < 0:
232  return "min " + pronounce_number_nl(abs(num), places)
233  else:
234  if num == int(num):
235  return pronounce_whole_number_nl(num)
236  else:
237  whole_number_part = floor(num)
238  fractional_part = num - whole_number_part
239  result += pronounce_whole_number_nl(whole_number_part)
240  if places > 0:
241  result += " komma"
242  result += pronounce_fractional_nl(fractional_part, places)
243  return result
244 
245 
247  ordinals = ["nulste", "eerste", "tweede", "derde", "vierde", "vijfde",
248  "zesde", "zevende", "achtste"]
249 
250  # only for whole positive numbers including zero
251  if num < 0 or num != int(num):
252  return num
253  if num < 4:
254  return ordinals[num]
255  if num < 8:
256  return pronounce_number_nl(num) + "de"
257  if num < 9:
258  return pronounce_number_nl(num) + "ste"
259  if num < 20:
260  return pronounce_number_nl(num) + "de"
261  return pronounce_number_nl(num) + "ste"
262 
263 
264 def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False):
265  """
266  Format a time to a comfortable human format
267 
268  For example, generate 'five thirty' for speech or '5:30' for
269  text display.
270 
271  Args:
272  dt (datetime): date to format (assumes already in local timezone)
273  speech (bool): format for speech (default/True) or display (False)=Fal
274  use_24hour (bool): output in 24-hour/military or 12-hour format
275  use_ampm (bool): include the am/pm for 12-hour format
276  Returns:
277  (str): The formatted time string
278  """
279  if use_24hour:
280  # e.g. "03:01" or "14:22"
281  string = dt.strftime("%H:%M")
282  else:
283  if use_ampm:
284  # e.g. "3:01 AM" or "2:22 PM"
285  string = dt.strftime("%I:%M %p")
286  else:
287  # e.g. "3:01" or "2:22"
288  string = dt.strftime("%I:%M")
289  if string[0] == '0':
290  string = string[1:] # strip leading zeros
291 
292  if not speech:
293  return string
294 
295  # Generate a speakable version of the time
296  speak = ""
297  if use_24hour:
298  speak += pronounce_number_nl(dt.hour)
299  speak += " uur"
300  if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
301  # "13 uur" not "13 hundred hours"
302  speak += " " + pronounce_number_nl(dt.minute)
303  return speak # ampm is ignored when use_24hour is true
304  else:
305  if dt.hour == 0 and dt.minute == 0:
306  return "Middernacht"
307  hour = dt.hour % 12
308  if dt.minute == 0:
309  hour = fix_hour(hour)
310  speak += pronounce_number_nl(hour)
311  speak += " uur"
312  elif dt.minute == 30:
313  speak += "half "
314  hour += 1
315  hour = fix_hour(hour)
316  speak += pronounce_number_nl(hour)
317  elif dt.minute == 15:
318  speak += "kwart over "
319  hour = fix_hour(hour)
320  speak += pronounce_number_nl(hour)
321  elif dt.minute == 45:
322  speak += "kwart voor "
323  hour += 1
324  hour = fix_hour(hour)
325  speak += pronounce_number_nl(hour)
326  elif dt.minute > 30:
327  speak += pronounce_number_nl(60 - dt.minute)
328  speak += " voor "
329  hour += 1
330  hour = fix_hour(hour)
331  speak += pronounce_number_nl(hour)
332  else:
333  speak += pronounce_number_nl(dt.minute)
334  speak += " over "
335  hour = fix_hour(hour)
336  speak += pronounce_number_nl(hour)
337 
338  if use_ampm:
339  speak += nice_part_of_day_nl(dt)
340 
341  return speak
342 
343 
344 def fix_hour(hour):
345  hour = hour % 12
346  if hour == 0:
347  hour = 12
348  return hour
349 
350 
352  if dt.hour < 6:
353  return " 's nachts"
354  if dt.hour < 12:
355  return " 's ochtends"
356  if dt.hour < 18:
357  return " 's middags"
358  if dt.hour < 24:
359  return " 's avonds"
360  raise Exception('dt.hour is bigger than 24')
361 
362 
364  # check for months and call nice_ordinal_nl declension of ordinals
365  # replace "^" with "tot de macht" (to the power of)
366  words = text.split()
367 
368  for idx, word in enumerate(words):
369  if word.lower() in months:
370  text = nice_ordinal_nl(text)
371 
372  if word == '^':
373  wordNext = words[idx + 1] if idx + 1 < len(words) else ""
374  if wordNext.isnumeric():
375  words[idx] = "tot de macht"
376  text = " ".join(words)
377  return text
378 
379 
380 def nice_ordinal_nl(text):
381  # check for months for declension of ordinals before months
382  # depending on articles/prepositions
383  normalized_text = text
384  words = text.split()
385  for idx, word in enumerate(words):
386  wordNext = words[idx + 1] if idx + 1 < len(words) else ""
387  wordPrev = words[idx - 1] if idx > 0 else ""
388  if word[:-1].isdecimal():
389  if wordNext.lower() in months:
390  if wordPrev == 'de':
391  word = pronounce_ordinal_nl(int(word))
392  else:
393  word = pronounce_number_nl(int(word))
394  words[idx] = word
395  normalized_text = " ".join(words)
396  return normalized_text
def nice_number_nl(number, speech, denominators)
Definition: format_nl.py:100
def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False)
Definition: format_nl.py:264
def pronounce_number_nl(num, places=2)
Definition: format_nl.py:138
def convert_to_mixed_fraction(number, denominators)


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40