mycroft_ros: parse_da.py Source File

Go to the documentation of this file.
 # -*- coding: utf-8 -*-
 #
 # Copyright 2017 Mycroft AI Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 from datetime import datetime
 from dateutil.relativedelta import relativedelta
 from mycroft.util.lang.parse_common import is_numeric, look_for_fractions, \
     extract_numbers_generic
 from mycroft.util.lang.format_da import pronounce_number_da
 
 da_numbers = {
     'nul': 0,
     'en': 1,
     'et': 1,
     'to': 2,
     'tre': 3,
     'fire': 4,
     'fem': 5,
     'seks': 6,
     'syv': 7,
     'otte': 8,
     'ni': 9,
     'ti': 10,
     'elve': 11,
     'tolv': 12,
     'tretten': 13,
     'fjorten': 14,
     'femten': 15,
     'seksten': 16,
     'sytten': 17,
     'atten': 18,
     'nitten': 19,
     'tyve': 20,
     'enogtyve': 21,
     'toogtyve': 22,
     'treogtyve': 23,
     'fireogtyve': 24,
     'femogtyve': 25,
     'seksogtyve': 26,
     'syvogtyve': 27,
     'otteogtyve': 28,
     'niogtyve': 29,
     'tredive': 30,
     'enogtredive': 31,
     'fyrrre': 40,
     'halvtres': 50,
     'tres': 60,
     'halvfjers': 70,
     'firs': 80,
     'halvfems': 90,
     'hunderede': 100,
     'tohundrede': 200,
     'trehundrede': 300,
     'firehundrede': 400,
     'femhundrede': 500,
     'sekshundrede': 600,
     'syvhundrede': 700,
     'ottehundrede': 800,
     'nihundrede': 900,
     'tusinde': 1000,
     'million': 1000000
 }
 
 
 def extractnumber_da(text):
     """
     This function prepares the given text for parsing by making
     numbers consistent, getting rid of contractions, etc.
     Args:
         text (str): the string to normalize
     Returns:
         (int) or (float): The value of extracted number
 
 
     undefined articles cannot be suppressed in German:
     'ein Pferd' means 'one horse' and 'a horse'
 
     """
     aWords = text.split()
     aWords = [word for word in aWords if
               word not in ["den", "det"]]
     and_pass = False
     valPreAnd = False
     val = False
     count = 0
     while count < len(aWords):
         word = aWords[count]
         if is_numeric(word):
             if word.isdigit():            # doesn't work with decimals
                 val = float(word)
         elif isFractional_da(word):
             val = isFractional_da(word)
         elif isOrdinal_da(word):
             val = isOrdinal_da(word)
         else:
             if word in da_numbers:
                 val = da_numbers[word]
                 if count < (len(aWords) - 1):
                     wordNext = aWords[count + 1]
                 else:
                     wordNext = ""
                 valNext = isFractional_da(wordNext)
 
                 if valNext:
                     val = val * valNext
                     aWords[count + 1] = ""
 
         if not val:
             # look for fractions like "2/3"
             aPieces = word.split('/')
             # if (len(aPieces) == 2 and is_numeric(aPieces[0])
             #   and is_numeric(aPieces[1])):
             if look_for_fractions(aPieces):
                 val = float(aPieces[0]) / float(aPieces[1])
             elif and_pass:
                 # added to value, quit here
                 val = valPreAnd
                 break
             else:
                 count += 1
                 continue
 
         aWords[count] = ""
 
         if and_pass:
             aWords[count - 1] = ''  # remove "og"
             val += valPreAnd
         elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
             and_pass = True
             valPreAnd = val
             val = False
             count += 2
             continue
         elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
             and_pass = True
             valPreAnd = val
             val = False
             count += 3
             continue
 
         break
 
     if not val:
         return False
 
     return val
 
 
 def extract_datetime_da(string, currentDate, default_time):
     def clean_string(s):
         """
             cleans the input string of unneeded punctuation
             and capitalization among other things.
 
             'am' is a preposition, so cannot currently be used
             for 12 hour date format
         """
 
         s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
             .replace(' den ', ' ').replace(' det ', ' ').replace(' om ',
                                                                  ' ').replace(
             ' om ', ' ') \
             .replace(' på ', ' ').replace(' om ', ' ')
         wordList = s.split()
 
         for idx, word in enumerate(wordList):
             if isOrdinal_da(word) is not False:
                 word = str(isOrdinal_da(word))
                 wordList[idx] = word
 
         return wordList
 
     def date_found():
         return found or \
                (
                        datestr != "" or timeStr != "" or
                        yearOffset != 0 or monthOffset != 0 or
                        dayOffset is True or hrOffset != 0 or
                        hrAbs or minOffset != 0 or
                        minAbs or secOffset != 0
                )
 
     if string == "" or not currentDate:
         return None
 
     found = False
     daySpecified = False
     dayOffset = False
     monthOffset = 0
     yearOffset = 0
     dateNow = currentDate
     today = dateNow.strftime("%w")
     currentYear = dateNow.strftime("%Y")
     fromFlag = False
     datestr = ""
     hasYear = False
     timeQualifier = ""
 
     timeQualifiersList = ['tidlig',
                           'morgen',
                           'morgenen',
                           'formidag',
                           'formiddagen',
                           'eftermiddag',
                           'eftermiddagen',
                           'aften',
                           'aftenen',
                           'nat',
                           'natten']
     markers = ['i', 'om', 'på', 'klokken', 'ved']
     days = ['mandag', 'tirsdag', 'onsdag',
             'torsdag', 'fredag', 'lørdag', 'søndag']
     months = ['januar', 'februar', 'marts', 'april', 'maj', 'juni',
               'juli', 'august', 'september', 'oktober', 'november',
               'desember']
     monthsShort = ['jan', 'feb', 'mar', 'apr', 'maj', 'juni', 'juli', 'aug',
                    'sep', 'okt', 'nov', 'des']
 
     validFollowups = days + months + monthsShort
     validFollowups.append("i dag")
     validFollowups.append("morgen")
     validFollowups.append("næste")
     validFollowups.append("forige")
     validFollowups.append("nu")
 
     words = clean_string(string)
 
     for idx, word in enumerate(words):
         if word == "":
             continue
         wordPrevPrev = words[idx - 2] if idx > 1 else ""
         wordPrev = words[idx - 1] if idx > 0 else ""
         wordNext = words[idx + 1] if idx + 1 < len(words) else ""
         wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
 
         start = idx
         used = 0
         # save timequalifier for later
         if word in timeQualifiersList:
             timeQualifier = word
             # parse today, tomorrow, day after tomorrow
         elif word == "dag" and not fromFlag:
             dayOffset = 0
             used += 1
         elif word == "morgen" and not fromFlag and wordPrev != "om" and \
                 wordPrev not in days:  # morgen means tomorrow if not "am
             # Morgen" and not [day of the week] morgen
             dayOffset = 1
             used += 1
         elif word == "overmorgen" and not fromFlag:
             dayOffset = 2
             used += 1
             # parse 5 days, 10 weeks, last week, next week
         elif word == "dag" or word == "dage":
             if wordPrev[0].isdigit():
                 dayOffset += int(wordPrev)
                 start -= 1
                 used = 2
         elif word == "uge" or word == "uger" and not fromFlag:
             if wordPrev[0].isdigit():
                 dayOffset += int(wordPrev) * 7
                 start -= 1
                 used = 2
             elif wordPrev[:6] == "næste":
                 dayOffset = 7
                 start -= 1
                 used = 2
             elif wordPrev[:5] == "forige":
                 dayOffset = -7
                 start -= 1
                 used = 2
                 # parse 10 months, next month, last month
         elif word == "måned" and not fromFlag:
             if wordPrev[0].isdigit():
                 monthOffset = int(wordPrev)
                 start -= 1
                 used = 2
             elif wordPrev[:6] == "næste":
                 monthOffset = 1
                 start -= 1
                 used = 2
             elif wordPrev[:5] == "forige":
                 monthOffset = -1
                 start -= 1
                 used = 2
                 # parse 5 years, next year, last year
         elif word == "år" and not fromFlag:
             if wordPrev[0].isdigit():
                 yearOffset = int(wordPrev)
                 start -= 1
                 used = 2
             elif wordPrev[:6] == " næste":
                 yearOffset = 1
                 start -= 1
                 used = 2
             elif wordPrev[:6] == "næste":
                 yearOffset = -1
                 start -= 1
                 used = 2
                 # parse Monday, Tuesday, etc., and next Monday,
                 # last Tuesday, etc.
         elif word in days and not fromFlag:
             d = days.index(word)
             dayOffset = (d + 1) - int(today)
             used = 1
             if dayOffset < 0:
                 dayOffset += 7
             if wordNext == "morgen":
                 # morgen means morning if preceded by
                 # the day of the week
                 words[idx + 1] = "tidlig"
             if wordPrev[:6] == "næste":
                 dayOffset += 7
                 used += 1
                 start -= 1
             elif wordPrev[:5] == "forige":
                 dayOffset -= 7
                 used += 1
                 start -= 1
                 # parse 15 of July, June 20th, Feb 18, 19 of February
         elif word in months or word in monthsShort and not fromFlag:
             try:
                 m = months.index(word)
             except ValueError:
                 m = monthsShort.index(word)
             used += 1
             datestr = months[m]
             if wordPrev and (wordPrev[0].isdigit() or
                              (wordPrev == "of" and wordPrevPrev[0].isdigit())):
                 if wordPrev == "of" and wordPrevPrev[0].isdigit():
                     datestr += " " + words[idx - 2]
                     used += 1
                     start -= 1
                 else:
                     datestr += " " + wordPrev
                 start -= 1
                 used += 1
                 if wordNext and wordNext[0].isdigit():
                     datestr += " " + wordNext
                     used += 1
                     hasYear = True
                 else:
                     hasYear = False
 
             elif wordNext and wordNext[0].isdigit():
                 datestr += " " + wordNext
                 used += 1
                 if wordNextNext and wordNextNext[0].isdigit():
                     datestr += " " + wordNextNext
                     used += 1
                     hasYear = True
                 else:
                     hasYear = False
         # parse 5 days from tomorrow, 10 weeks from next thursday,
         # 2 months from July
 
         if (
                 word == "fra" or word == "til" or word == "om") and wordNext \
                 in validFollowups:
             used = 2
             fromFlag = True
             if wordNext == "morgenen" and \
                     wordPrev != "om" and \
                     wordPrev not in days:
                 # morgen means tomorrow if not "am Morgen" and not
                 # [day of the week] morgen:
                 dayOffset += 1
             elif wordNext in days:
                 d = days.index(wordNext)
                 tmpOffset = (d + 1) - int(today)
                 used = 2
                 if tmpOffset < 0:
                     tmpOffset += 7
                 dayOffset += tmpOffset
             elif wordNextNext and wordNextNext in days:
                 d = days.index(wordNextNext)
                 tmpOffset = (d + 1) - int(today)
                 used = 3
                 if wordNext[:6] == "næste":
                     tmpOffset += 7
                     used += 1
                     start -= 1
                 elif wordNext[:5] == "forige":
                     tmpOffset -= 7
                     used += 1
                     start -= 1
                 dayOffset += tmpOffset
         if used > 0:
             if start - 1 > 0 and words[start - 1].startswith("denne"):
                 start -= 1
                 used += 1
 
             for i in range(0, used):
                 words[i + start] = ""
 
             if start - 1 >= 0 and words[start - 1] in markers:
                 words[start - 1] = ""
             found = True
             daySpecified = True
 
     # parse time
     timeStr = ""
     hrOffset = 0
     minOffset = 0
     secOffset = 0
     hrAbs = None
     minAbs = None
 
     for idx, word in enumerate(words):
         if word == "":
             continue
 
         wordPrevPrev = words[idx - 2] if idx > 1 else ""
         wordPrev = words[idx - 1] if idx > 0 else ""
         wordNext = words[idx + 1] if idx + 1 < len(words) else ""
         wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
         wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
         wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
 
         # parse noon, midnight, morning, afternoon, evening
         used = 0
         if word[:6] == "middag":
             hrAbs = 12
             used += 1
         elif word[:11] == "midnat":
             hrAbs = 0
             used += 1
         elif word == "morgenen" or (
                 wordPrev == "om" and word == "morgenen") or word == "tidlig":
             if not hrAbs:
                 hrAbs = 8
             used += 1
         elif word[:11] == "eftermiddag":
             if not hrAbs:
                 hrAbs = 15
             used += 1
         elif word[:5] == "aften":
             if not hrAbs:
                 hrAbs = 19
             used += 1
             # parse half an hour, quarter hour
         elif word == "time" and \
                 (wordPrev in markers or wordPrevPrev in markers):
             if wordPrev[:4] == "halv":
                 minOffset = 30
             elif wordPrev == "kvarter":
                 minOffset = 15
             elif wordPrev == "trekvarter":
                 minOffset = 45
             else:
                 hrOffset = 1
             if wordPrevPrev in markers:
                 words[idx - 2] = ""
             words[idx - 1] = ""
             used += 1
             hrAbs = -1
             minAbs = -1
             # parse 5:00 am, 12:00 p.m., etc
         elif word[0].isdigit():
             isTime = True
             strHH = ""
             strMM = ""
             remainder = ""
             if ':' in word:
                 # parse colons
                 # "3:00 in the morning"
                 stage = 0
                 length = len(word)
                 for i in range(length):
                     if stage == 0:
                         if word[i].isdigit():
                             strHH += word[i]
                         elif word[i] == ":":
                             stage = 1
                         else:
                             stage = 2
                             i -= 1
                     elif stage == 1:
                         if word[i].isdigit():
                             strMM += word[i]
                         else:
                             stage = 2
                             i -= 1
                     elif stage == 2:
                         remainder = word[i:].replace(".", "")
                         break
                 if remainder == "":
                     nextWord = wordNext.replace(".", "")
                     if nextWord == "am" or nextWord == "pm":
                         remainder = nextWord
                         used += 1
                     elif nextWord == "aften":
                         remainder = "pm"
                         used += 1
                     elif wordNext == "om" and wordNextNext == "morgenen":
                         remainder = "am"
                         used += 2
                     elif wordNext == "om" and wordNextNext == "eftermiddagen":
                         remainder = "pm"
                         used += 2
                     elif wordNext == "om" and wordNextNext == "aftenen":
                         remainder = "pm"
                         used += 2
                     elif wordNext == "morgen":
                         remainder = "am"
                         used += 1
                     elif wordNext == "eftermiddag":
                         remainder = "pm"
                         used += 1
                     elif wordNext == "aften":
                         remainder = "pm"
                         used += 1
                     elif wordNext == "i" and wordNextNext == "morgen":
                         remainder = "am"
                         used = 2
                     elif wordNext == "i" and wordNextNext == "eftermiddag":
                         remainder = "pm"
                         used = 2
                     elif wordNext == "i" and wordNextNext == "aften":
                         remainder = "pm"
                         used = 2
                     elif wordNext == "natten":
                         if strHH > 4:
                             remainder = "pm"
                         else:
                             remainder = "am"
                         used += 1
                     else:
                         if timeQualifier != "":
                             if strHH <= 12 and \
                                     (timeQualifier == "aftenen" or
                                      timeQualifier == "eftermiddagen"):
                                 strHH += 12  # what happens when strHH is 24?
             else:
                 # try to parse # s without colons
                 # 5 hours, 10 minutes etc.
                 length = len(word)
                 strNum = ""
                 remainder = ""
                 for i in range(length):
                     if word[i].isdigit():
                         strNum += word[i]
                     else:
                         remainder += word[i]
 
                 if remainder == "":
                     remainder = wordNext.replace(".", "").lstrip().rstrip()
 
                 if (
                         remainder == "pm" or
                         wordNext == "pm" or
                         remainder == "p.m." or
                         wordNext == "p.m."):
                     strHH = strNum
                     remainder = "pm"
                     used = 1
                 elif (
                         remainder == "am" or
                         wordNext == "am" or
                         remainder == "a.m." or
                         wordNext == "a.m."):
                     strHH = strNum
                     remainder = "am"
                     used = 1
                 else:
                     if wordNext == "time" and int(word) < 100:
                         # "in 3 hours"
                         hrOffset = int(word)
                         used = 2
                         isTime = False
                         hrAbs = -1
                         minAbs = -1
                     elif wordNext == "minut":
                         # "in 10 minutes"
                         minOffset = int(word)
                         used = 2
                         isTime = False
                         hrAbs = -1
                         minAbs = -1
                     elif wordNext == "sekund":
                         # in 5 seconds
                         secOffset = int(word)
                         used = 2
                         isTime = False
                         hrAbs = -1
                         minAbs = -1
 
                     elif wordNext == "time":
                         strHH = word
                         used += 1
                         isTime = True
                         if wordNextNext == timeQualifier:
                             strMM = ""
                             if wordNextNext[:11] == "eftermiddag":
                                 used += 1
                                 remainder = "pm"
                             elif wordNextNext == "om" and wordNextNextNext == \
                                     "eftermiddagen":
                                 used += 2
                                 remainder = "pm"
                             elif wordNextNext[:5] == "aften":
                                 used += 1
                                 remainder = "pm"
                             elif wordNextNext == "om" and wordNextNextNext == \
                                     "aftenen":
                                 used += 2
                                 remainder = "pm"
                             elif wordNextNext[:6] == "morgen":
                                 used += 1
                                 remainder = "am"
                             elif wordNextNext == "om" and wordNextNextNext == \
                                     "morgenen":
                                 used += 2
                                 remainder = "am"
                             elif wordNextNext == "natten":
                                 used += 1
                                 if 8 <= int(word) <= 12:
                                     remainder = "pm"
                                 else:
                                     remainder = "am"
 
                         elif is_numeric(wordNextNext):
                             strMM = wordNextNext
                             used += 1
                             if wordNextNextNext == timeQualifier:
                                 if wordNextNextNext[:11] == "eftermiddag":
                                     used += 1
                                     remainder = "pm"
                                 elif wordNextNextNext == "om" and \
                                         wordNextNextNextNext == \
                                         "eftermiddagen":
                                     used += 2
                                     remainder = "pm"
                                 elif wordNextNextNext[:6] == "natten":
                                     used += 1
                                     remainder = "pm"
                                 elif wordNextNextNext == "am" and \
                                         wordNextNextNextNext == "natten":
                                     used += 2
                                     remainder = "pm"
                                 elif wordNextNextNext[:7] == "morgenen":
                                     used += 1
                                     remainder = "am"
                                 elif wordNextNextNext == "om" and \
                                         wordNextNextNextNext == "morgenen":
                                     used += 2
                                     remainder = "am"
                                 elif wordNextNextNext == "natten":
                                     used += 1
                                     if 8 <= int(word) <= 12:
                                         remainder = "pm"
                                     else:
                                         remainder = "am"
 
                     elif wordNext == timeQualifier:
                         strHH = word
                         strMM = 00
                         isTime = True
                         if wordNext[:10] == "eftermidag":
                             used += 1
                             remainder = "pm"
                         elif wordNext == "om" and \
                                 wordNextNext == "eftermiddanen":
                             used += 2
                             remainder = "pm"
                         elif wordNext[:7] == "aftenen":
                             used += 1
                             remainder = "pm"
                         elif wordNext == "om" and wordNextNext == "aftenen":
                             used += 2
                             remainder = "pm"
                         elif wordNext[:7] == "morgenen":
                             used += 1
                             remainder = "am"
                         elif wordNext == "ao" and wordNextNext == "morgenen":
                             used += 2
                             remainder = "am"
                         elif wordNext == "natten":
                             used += 1
                             if 8 <= int(word) <= 12:
                                 remainder = "pm"
                             else:
                                 remainder = "am"
 
                 # if timeQualifier != "":
                 #     military = True
                 # else:
                 #     isTime = False
 
             strHH = int(strHH) if strHH else 0
             strMM = int(strMM) if strMM else 0
             strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
             strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
             if strHH > 24 or strMM > 59:
                 isTime = False
                 used = 0
             if isTime:
                 hrAbs = strHH * 1
                 minAbs = strMM * 1
                 used += 1
         if used > 0:
             # removed parsed words from the sentence
             for i in range(used):
                 words[idx + i] = ""
 
             if wordPrev == "tidlig":
                 hrOffset = -1
                 words[idx - 1] = ""
                 idx -= 1
             elif wordPrev == "sen":
                 hrOffset = 1
                 words[idx - 1] = ""
                 idx -= 1
             if idx > 0 and wordPrev in markers:
                 words[idx - 1] = ""
             if idx > 1 and wordPrevPrev in markers:
                 words[idx - 2] = ""
 
             idx += used - 1
             found = True
 
     # check that we found a date
     if not date_found:
         return None
 
     if dayOffset is False:
         dayOffset = 0
 
     # perform date manipulation
 
     extractedDate = dateNow
     extractedDate = extractedDate.replace(microsecond=0,
                                           second=0,
                                           minute=0,
                                           hour=0)
     if datestr != "":
         en_months = ['january', 'february', 'march', 'april', 'may', 'june',
                      'july', 'august', 'september', 'october', 'november',
                      'december']
         en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
                           'aug',
                           'sept', 'oct', 'nov', 'dec']
         for idx, en_month in enumerate(en_months):
             datestr = datestr.replace(months[idx], en_month)
         for idx, en_month in enumerate(en_monthsShort):
             datestr = datestr.replace(monthsShort[idx], en_month)
 
         temp = datetime.strptime(datestr, "%B %d")
         if not hasYear:
             temp = temp.replace(year=extractedDate.year)
             if extractedDate < temp:
                 extractedDate = extractedDate.replace(year=int(currentYear),
                                                       month=int(
                                                           temp.strftime(
                                                               "%m")),
                                                       day=int(temp.strftime(
                                                           "%d")))
             else:
                 extractedDate = extractedDate.replace(
                     year=int(currentYear) + 1,
                     month=int(temp.strftime("%m")),
                     day=int(temp.strftime("%d")))
         else:
             extractedDate = extractedDate.replace(
                 year=int(temp.strftime("%Y")),
                 month=int(temp.strftime("%m")),
                 day=int(temp.strftime("%d")))
 
     if timeStr != "":
         temp = datetime(timeStr)
         extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
                                               minute=temp.strftime("%M"),
                                               second=temp.strftime("%S"))
 
     if yearOffset != 0:
         extractedDate = extractedDate + relativedelta(years=yearOffset)
     if monthOffset != 0:
         extractedDate = extractedDate + relativedelta(months=monthOffset)
     if dayOffset != 0:
         extractedDate = extractedDate + relativedelta(days=dayOffset)
 
     if hrAbs is None and minAbs is None and default_time:
         hrAbs = default_time.hour
         minAbs = default_time.minute
 
     if hrAbs != -1 and minAbs != -1:
 
         extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
                                                       minutes=minAbs or 0)
         if (hrAbs or minAbs) and datestr == "":
             if not daySpecified and dateNow > extractedDate:
                 extractedDate = extractedDate + relativedelta(days=1)
     if hrOffset != 0:
         extractedDate = extractedDate + relativedelta(hours=hrOffset)
     if minOffset != 0:
         extractedDate = extractedDate + relativedelta(minutes=minOffset)
     if secOffset != 0:
         extractedDate = extractedDate + relativedelta(seconds=secOffset)
     for idx, word in enumerate(words):
         if words[idx] == "og" and words[idx - 1] == "" \
                 and words[idx + 1] == "":
             words[idx] = ""
 
     resultStr = " ".join(words)
     resultStr = ' '.join(resultStr.split())
 
     return [extractedDate, resultStr]
 
 
 def isFractional_da(input_str):
     """
     This function takes the given text and checks if it is a fraction.
 
     Args:
         input_str (str): the string to check if fractional
     Returns:
         (bool) or (float): False if not a fraction, otherwise the fraction
 
     """
     if input_str.lower().startswith("halv"):
         return 0.5
 
     if input_str.lower() == "trediedel":
         return 1.0 / 3
     elif input_str.endswith('del'):
         input_str = input_str[:len(input_str) - 3]  # e.g. "fünftel"
         if input_str.lower() in da_numbers:
             return 1.0 / (da_numbers[input_str.lower()])
 
     return False
 
 
 def isOrdinal_da(input_str):
     """
     This function takes the given text and checks if it is an ordinal number.
 
     Args:
         input_str (str): the string to check if ordinal
     Returns:
         (bool) or (float): False if not an ordinal, otherwise the number
         corresponding to the ordinal
 
     ordinals for 1, 3, 7 and 8 are irregular
 
     only works for ordinals corresponding to the numbers in da_numbers
 
     """
 
     lowerstr = input_str.lower()
 
     if lowerstr.startswith("første"):
         return 1
     if lowerstr.startswith("anden"):
         return 2
     if lowerstr.startswith("tredie"):
         return 3
     if lowerstr.startswith("fjerde"):
         return 4
     if lowerstr.startswith("femte"):
         return 5
     if lowerstr.startswith("sjette"):
         return 6
     if lowerstr.startswith("elfte"):
         return 1
     if lowerstr.startswith("tolvfte"):
         return 12
 
     if lowerstr[-3:] == "nde":
         # from 20 suffix is -ste*
         lowerstr = lowerstr[:-3]
         if lowerstr in da_numbers:
             return da_numbers[lowerstr]
 
     if lowerstr[-4:] in ["ende"]:
         lowerstr = lowerstr[:-4]
         if lowerstr in da_numbers:
             return da_numbers[lowerstr]
 
     if lowerstr[-2:] == "te":  # below 20 suffix is -te*
         lowerstr = lowerstr[:-2]
         if lowerstr in da_numbers:
             return da_numbers[lowerstr]
 
     return False
 
 
 def normalize_da(text, remove_articles):
     """ German string normalization """
 
     words = text.split()  # this also removed extra spaces
     normalized = ""
     for word in words:
         if remove_articles and word in ["den", "det"]:
             continue
 
         # Convert numbers into digits, e.g. "two" -> "2"
 
         if word in da_numbers:
             word = str(da_numbers[word])
 
         normalized += " " + word
 
     return normalized[1:]  # strip the initial space
 
 
 def extract_numbers_da(text, short_scale=True, ordinals=False):
     """
         Takes in a string and extracts a list of numbers.
 
     Args:
         text (str): the string to extract a number from
         short_scale (bool): Use "short scale" or "long scale" for large
             numbers -- over a million.  The default is short scale, which
             is now common in most English speaking countries.
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
     Returns:
         list: list of extracted numbers as floats
     """
     return extract_numbers_generic(text, pronounce_number_da, extractnumber_da,
                                    short_scale=short_scale, ordinals=ordinals)