17 from difflib
import SequenceMatcher
45 Log a warning when a language is unsupported 49 The language that was supplied. 50 supported_languages: [str] 51 The list of supported languages. 53 supported =
' '.join(supported_languages)
54 LOG.warning(
'Language "{language}" not recognized! Please make sure your ' 55 'language is one of the following: {supported}.' 56 .format(language=language, supported=supported))
60 """Perform a 'fuzzy' comparison between two strings. 62 float: match percentage -- 1.0 for perfect match, 63 down to 0.0 for no match at all. 65 return SequenceMatcher(
None, x, against).ratio()
70 Find best match from a list or dictionary given an input 74 choices: list or dictionary of choices 76 Returns: tuple with best match, score 78 if isinstance(choices, dict):
79 _choices = list(choices.keys())
80 elif isinstance(choices, list):
83 raise ValueError(
'a list or dict of choices must be provided')
85 best = (_choices[0],
fuzzy_match(query, _choices[0]))
86 for c
in _choices[1:]:
91 if isinstance(choices, dict):
92 return (choices[best[0]], best[1])
99 Takes in a string and extracts a list of numbers. 102 text (str): the string to extract a number from 103 short_scale (bool): Use "short scale" or "long scale" for large 104 numbers -- over a million. The default is short scale, which 105 is now common in most English speaking countries. 106 See https://en.wikipedia.org/wiki/Names_of_large_numbers 107 ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 108 lang (str): the BCP-47 code for the language to use, None uses default 110 list: list of extracted numbers as floats, or empty list if none found 113 if lang_code ==
"en":
115 elif lang_code ==
"de":
117 elif lang_code ==
"fr":
119 elif lang_code ==
"it":
121 elif lang_code ==
"da":
127 """Takes in a string and extracts a number. 130 text (str): the string to extract a number from 131 short_scale (bool): Use "short scale" or "long scale" for large 132 numbers -- over a million. The default is short scale, which 133 is now common in most English speaking countries. 134 See https://en.wikipedia.org/wiki/Names_of_large_numbers 135 ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 136 lang (str): the BCP-47 code for the language to use, None uses default 138 (int, float or False): The number extracted or False if the input 139 text contains no numbers 142 if lang_code ==
"en":
145 elif lang_code ==
"es":
147 elif lang_code ==
"pt":
149 elif lang_code ==
"it":
152 elif lang_code ==
"fr":
154 elif lang_code ==
"sv":
156 elif lang_code ==
"de":
158 elif lang_code ==
"da":
162 [
'en',
'es',
'pt',
'it',
'fr',
'sv',
'de',
'da'])
167 """ Convert an english phrase into a number of seconds 172 "3 days 8 hours 10 minutes and 49 seconds" 173 into an int, representing the total number of seconds. 175 The words used in the duration will be consumed, and 176 the remainder returned. 178 As an example, "set a timer for 5 minutes" would return 179 (300, "set a timer for"). 182 text (str): string containing a duration 183 lang (str): the BCP-47 code for the language to use, None uses default 187 A tuple containing the duration and the remaining text 188 not consumed in the parsing. The first value will 189 be None if no duration is found. The text returned 190 will have whitespace stripped from the ends. 194 if lang_code ==
"en":
204 Extracts date and time information from a sentence. Parses many of the 205 common ways that humans express dates and times, including relative dates 206 like "5 days from today", "tomorrow', and "Tuesday". 208 Vague terminology are given arbitrary values, like: 213 If a time isn't supplied or implied, the function defaults to 12 AM 216 text (str): the text to be interpreted 217 anchorDate (:obj:`datetime`, optional): the date to be used for 218 relative dating (for example, what does "tomorrow" mean?). 219 Defaults to the current local date/time. 220 lang (str): the BCP-47 code for the language to use, None uses default 221 default_time (datetime.time): time to use if none was found in 225 [:obj:`datetime`, :obj:`str`]: 'datetime' is the extracted date 226 as a datetime object in the user's local timezone. 227 'leftover_string' is the original phrase with all date and time 228 related keywords stripped out. See examples for further 231 Returns 'None' if no date or time related text is found. 235 >>> extract_datetime( 236 ... "What is the weather like the day after tomorrow?", 237 ... datetime(2017, 06, 30, 00, 00) 239 [datetime.datetime(2017, 7, 2, 0, 0), 'what is weather like'] 241 >>> extract_datetime( 242 ... "Set up an appointment 2 weeks from Sunday at 5 pm", 243 ... datetime(2016, 02, 19, 00, 00) 245 [datetime.datetime(2016, 3, 6, 17, 0), 'set up appointment'] 247 >>> extract_datetime( 248 ... "Set up an appointment", 249 ... datetime(2016, 02, 19, 00, 00) 259 if lang_code ==
"en":
261 elif lang_code ==
"es":
263 elif lang_code ==
"pt":
265 elif lang_code ==
"it":
267 elif lang_code ==
"fr":
269 elif lang_code ==
"sv":
271 elif lang_code ==
"de":
273 elif lang_code ==
"da":
277 [
'en',
'es',
'pt',
'it',
'fr',
'sv',
'de',
'da'])
282 """Prepare a string for parsing 284 This function prepares the given text for parsing by making 285 numbers consistent, getting rid of contractions, etc. 288 text (str): the string to normalize 289 lang (str): the BCP-47 code for the language to use, None uses default 290 remove_articles (bool): whether to remove articles (like 'a', or 291 'the'). True by default. 294 (str): The normalized string. 299 if lang_code ==
"en":
301 elif lang_code ==
"es":
303 elif lang_code ==
"pt":
305 elif lang_code ==
"it":
307 elif lang_code ==
"fr":
309 elif lang_code ==
"sv":
311 elif lang_code ==
"de":
313 elif lang_code ==
"da":
317 [
'en',
'es',
'pt',
'it',
'fr',
'sv',
'de',
'da'])
322 """ Guess the gender of a word 324 Some languages assign genders to specific words. This method will attempt 325 to determine the gender, optionally using the provided context sentence. 328 word (str): The word to look up 329 context (str, optional): String containing word, for context 330 lang (str): the BCP-47 code for the language to use, None uses default 333 str: The code "m" (male), "f" (female) or "n" (neutral) for the gender, 334 or None if unknown/or unused in the given language. 339 if lang_code
in [
"pt",
"es"]:
342 elif lang_code ==
"it":
def extractnumber_it(text, short_scale=False, ordinals=False)
def normalize_en(text, remove_articles)
def extract_number(text, short_scale=True, ordinals=False, lang=None)
def normalize_it(text, remove_articles)
def _log_unsupported_language(language, supported_languages)
def extract_datetime_fr(string, currentDate, default_time)
def extract_numbers_de(text, short_scale=True, ordinals=False)
def extract_datetime_en(string, dateNow, default_time)
def extract_datetime_it(string, dateNow, default_time)
def extractnumber_da(text)
def extractnumber_es(text)
def extractnumber_sv(text)
def match_one(query, choices)
def extractnumber_en(text, short_scale=True, ordinals=False)
def extract_datetime_da(string, currentDate, default_time)
def extract_datetime_es(input_str, currentDate=None, default_time=None)
def extract_numbers(text, short_scale=True, ordinals=False, lang=None)
def extract_numbers_en(text, short_scale=True, ordinals=False)
def normalize_de(text, remove_articles)
def get_gender_pt(word, raw_string="")
def extract_duration_en(text)
def extract_datetime_pt(input_str, currentDate, default_time)
def normalize_sv(text, remove_articles)
def extract_duration(text, lang=None)
def extract_datetime_sv(string, currentDate, default_time)
def extract_numbers_it(text, short_scale=False, ordinals=False)
def extract_datetime_de(string, currentDate, default_time)
def extractnumber_pt(text)
def normalize_pt(text, remove_articles)
def get_gender_it(word, raw_string="")
def get_primary_lang_code(lang=None)
def normalize_fr(text, remove_articles)
def extractnumber_de(text)
def get_gender(word, context="", lang=None)
def normalize_da(text, remove_articles)
def extract_numbers_fr(text, short_scale=True, ordinals=False)
def extractnumber_fr(text)
def extract_datetime(text, anchorDate=None, lang=None, default_time=None)
def normalize_es(text, remove_articles)
def fuzzy_match(x, against)
def extract_numbers_da(text, short_scale=True, ordinals=False)
def normalize(text, lang=None, remove_articles=True)