parse_common.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright 2017 Mycroft AI Inc.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 
18 
19 def is_numeric(input_str):
20  """
21  Takes in a string and tests to see if it is a number.
22  Args:
23  text (str): string to test if a number
24  Returns:
25  (bool): True if a number, else False
26 
27  """
28 
29  try:
30  float(input_str)
31  return True
32  except ValueError:
33  return False
34 
35 
36 def look_for_fractions(split_list):
37  """"
38  This function takes a list made by fraction & determines if a fraction.
39 
40  Args:
41  split_list (list): list created by splitting on '/'
42  Returns:
43  (bool): False if not a fraction, otherwise True
44 
45  """
46 
47  if len(split_list) == 2:
48  if is_numeric(split_list[0]) and is_numeric(split_list[1]):
49  return True
50 
51  return False
52 
53 
54 def extract_numbers_generic(text, pronounce_handler, extract_handler,
55  short_scale=True, ordinals=False):
56  """
57  Takes in a string and extracts a list of numbers.
58  Language agnostic, per language parsers need to be provided
59 
60  Args:
61  text (str): the string to extract a number from
62  pronounce_handler (function): function that pronounces a number
63  extract_handler (function): function that extracts the last number
64  present in a string
65  short_scale (bool): Use "short scale" or "long scale" for large
66  numbers -- over a million. The default is short scale, which
67  is now common in most English speaking countries.
68  See https://en.wikipedia.org/wiki/Names_of_large_numbers
69  ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
70  Returns:
71  list: list of extracted numbers as floats
72  """
73  numbers = []
74  normalized = text
75  extract = extract_handler(normalized, short_scale, ordinals)
76  to_parse = normalized
77  while extract:
78  numbers.append(extract)
79  prev = to_parse
80  num_txt = pronounce_handler(extract)
81  extract = str(extract)
82  if extract.endswith(".0"):
83  extract = extract[:-2]
84 
85  # handle duplicate occurences, replace last one only
86  def replace_right(source, target, replacement, replacements=None):
87  return replacement.join(source.rsplit(target, replacements))
88 
89  normalized = replace_right(normalized, num_txt, extract, 1)
90  # last biggest number was replaced, recurse to handle cases like
91  # test one two 3
92  to_parse = replace_right(to_parse, num_txt, extract, 1)
93  to_parse = replace_right(to_parse, extract, " ", 1)
94  if to_parse == prev:
95  # avoid infinite loops, occasionally pronounced number may be
96  # different from extracted text,
97  # ie pronounce(0.5) != half and extract(half) == 0.5
98  extract = False
99  # TODO fix this
100  else:
101  extract = extract_handler(to_parse, short_scale, ordinals)
102  numbers.reverse()
103  return numbers
def extract_numbers_generic(text, pronounce_handler, extract_handler, short_scale=True, ordinals=False)
Definition: parse_common.py:55
def look_for_fractions(split_list)
Definition: parse_common.py:36


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40