parsedate.c
Go to the documentation of this file.
00001 /***************************************************************************
00002  *                                  _   _ ____  _
00003  *  Project                     ___| | | |  _ \| |
00004  *                             / __| | | | |_) | |
00005  *                            | (__| |_| |  _ <| |___
00006  *                             \___|\___/|_| \_\_____|
00007  *
00008  * Copyright (C) 1998 - 2016, Daniel Stenberg, <daniel@haxx.se>, et al.
00009  *
00010  * This software is licensed as described in the file COPYING, which
00011  * you should have received as part of this distribution. The terms
00012  * are also available at https://curl.haxx.se/docs/copyright.html.
00013  *
00014  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
00015  * copies of the Software, and permit persons to whom the Software is
00016  * furnished to do so, under the terms of the COPYING file.
00017  *
00018  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
00019  * KIND, either express or implied.
00020  *
00021  ***************************************************************************/
00022 /*
00023   A brief summary of the date string formats this parser groks:
00024 
00025   RFC 2616 3.3.1
00026 
00027   Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
00028   Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
00029   Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
00030 
00031   we support dates without week day name:
00032 
00033   06 Nov 1994 08:49:37 GMT
00034   06-Nov-94 08:49:37 GMT
00035   Nov  6 08:49:37 1994
00036 
00037   without the time zone:
00038 
00039   06 Nov 1994 08:49:37
00040   06-Nov-94 08:49:37
00041 
00042   weird order:
00043 
00044   1994 Nov 6 08:49:37  (GNU date fails)
00045   GMT 08:49:37 06-Nov-94 Sunday
00046   94 6 Nov 08:49:37    (GNU date fails)
00047 
00048   time left out:
00049 
00050   1994 Nov 6
00051   06-Nov-94
00052   Sun Nov 6 94
00053 
00054   unusual separators:
00055 
00056   1994.Nov.6
00057   Sun/Nov/6/94/GMT
00058 
00059   commonly used time zone names:
00060 
00061   Sun, 06 Nov 1994 08:49:37 CET
00062   06 Nov 1994 08:49:37 EST
00063 
00064   time zones specified using RFC822 style:
00065 
00066   Sun, 12 Sep 2004 15:05:58 -0700
00067   Sat, 11 Sep 2004 21:32:11 +0200
00068 
00069   compact numerical date strings:
00070 
00071   20040912 15:05:58 -0700
00072   20040911 +0200
00073 
00074 */
00075 
00076 #include "curl_setup.h"
00077 
00078 #ifdef HAVE_LIMITS_H
00079 #include <limits.h>
00080 #endif
00081 
00082 #include <curl/curl.h>
00083 #include "strcase.h"
00084 #include "warnless.h"
00085 #include "parsedate.h"
00086 
00087 const char * const Curl_wkday[] =
00088 {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
00089 static const char * const weekday[] =
00090 { "Monday", "Tuesday", "Wednesday", "Thursday",
00091   "Friday", "Saturday", "Sunday" };
00092 const char * const Curl_month[]=
00093 { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
00094   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
00095 
00096 struct tzinfo {
00097   char name[5];
00098   int offset; /* +/- in minutes */
00099 };
00100 
00101 /*
00102  * parsedate()
00103  *
00104  * Returns:
00105  *
00106  * PARSEDATE_OK     - a fine conversion
00107  * PARSEDATE_FAIL   - failed to convert
00108  * PARSEDATE_LATER  - time overflow at the far end of time_t
00109  * PARSEDATE_SOONER - time underflow at the low end of time_t
00110  */
00111 
00112 static int parsedate(const char *date, time_t *output);
00113 
00114 #define PARSEDATE_OK     0
00115 #define PARSEDATE_FAIL   -1
00116 #define PARSEDATE_LATER  1
00117 #define PARSEDATE_SOONER 2
00118 
00119 /* Here's a bunch of frequently used time zone names. These were supported
00120    by the old getdate parser. */
00121 #define tDAYZONE -60       /* offset for daylight savings time */
00122 static const struct tzinfo tz[]= {
00123   {"GMT", 0},              /* Greenwich Mean */
00124   {"UTC", 0},              /* Universal (Coordinated) */
00125   {"WET", 0},              /* Western European */
00126   {"BST", 0 tDAYZONE},     /* British Summer */
00127   {"WAT", 60},             /* West Africa */
00128   {"AST", 240},            /* Atlantic Standard */
00129   {"ADT", 240 tDAYZONE},   /* Atlantic Daylight */
00130   {"EST", 300},            /* Eastern Standard */
00131   {"EDT", 300 tDAYZONE},   /* Eastern Daylight */
00132   {"CST", 360},            /* Central Standard */
00133   {"CDT", 360 tDAYZONE},   /* Central Daylight */
00134   {"MST", 420},            /* Mountain Standard */
00135   {"MDT", 420 tDAYZONE},   /* Mountain Daylight */
00136   {"PST", 480},            /* Pacific Standard */
00137   {"PDT", 480 tDAYZONE},   /* Pacific Daylight */
00138   {"YST", 540},            /* Yukon Standard */
00139   {"YDT", 540 tDAYZONE},   /* Yukon Daylight */
00140   {"HST", 600},            /* Hawaii Standard */
00141   {"HDT", 600 tDAYZONE},   /* Hawaii Daylight */
00142   {"CAT", 600},            /* Central Alaska */
00143   {"AHST", 600},           /* Alaska-Hawaii Standard */
00144   {"NT",  660},            /* Nome */
00145   {"IDLW", 720},           /* International Date Line West */
00146   {"CET", -60},            /* Central European */
00147   {"MET", -60},            /* Middle European */
00148   {"MEWT", -60},           /* Middle European Winter */
00149   {"MEST", -60 tDAYZONE},  /* Middle European Summer */
00150   {"CEST", -60 tDAYZONE},  /* Central European Summer */
00151   {"MESZ", -60 tDAYZONE},  /* Middle European Summer */
00152   {"FWT", -60},            /* French Winter */
00153   {"FST", -60 tDAYZONE},   /* French Summer */
00154   {"EET", -120},           /* Eastern Europe, USSR Zone 1 */
00155   {"WAST", -420},          /* West Australian Standard */
00156   {"WADT", -420 tDAYZONE}, /* West Australian Daylight */
00157   {"CCT", -480},           /* China Coast, USSR Zone 7 */
00158   {"JST", -540},           /* Japan Standard, USSR Zone 8 */
00159   {"EAST", -600},          /* Eastern Australian Standard */
00160   {"EADT", -600 tDAYZONE}, /* Eastern Australian Daylight */
00161   {"GST", -600},           /* Guam Standard, USSR Zone 9 */
00162   {"NZT", -720},           /* New Zealand */
00163   {"NZST", -720},          /* New Zealand Standard */
00164   {"NZDT", -720 tDAYZONE}, /* New Zealand Daylight */
00165   {"IDLE", -720},          /* International Date Line East */
00166   /* Next up: Military timezone names. RFC822 allowed these, but (as noted in
00167      RFC 1123) had their signs wrong. Here we use the correct signs to match
00168      actual military usage.
00169    */
00170   {"A",  +1 * 60},         /* Alpha */
00171   {"B",  +2 * 60},         /* Bravo */
00172   {"C",  +3 * 60},         /* Charlie */
00173   {"D",  +4 * 60},         /* Delta */
00174   {"E",  +5 * 60},         /* Echo */
00175   {"F",  +6 * 60},         /* Foxtrot */
00176   {"G",  +7 * 60},         /* Golf */
00177   {"H",  +8 * 60},         /* Hotel */
00178   {"I",  +9 * 60},         /* India */
00179   /* "J", Juliet is not used as a timezone, to indicate the observer's local
00180      time */
00181   {"K", +10 * 60},         /* Kilo */
00182   {"L", +11 * 60},         /* Lima */
00183   {"M", +12 * 60},         /* Mike */
00184   {"N",  -1 * 60},         /* November */
00185   {"O",  -2 * 60},         /* Oscar */
00186   {"P",  -3 * 60},         /* Papa */
00187   {"Q",  -4 * 60},         /* Quebec */
00188   {"R",  -5 * 60},         /* Romeo */
00189   {"S",  -6 * 60},         /* Sierra */
00190   {"T",  -7 * 60},         /* Tango */
00191   {"U",  -8 * 60},         /* Uniform */
00192   {"V",  -9 * 60},         /* Victor */
00193   {"W", -10 * 60},         /* Whiskey */
00194   {"X", -11 * 60},         /* X-ray */
00195   {"Y", -12 * 60},         /* Yankee */
00196   {"Z", 0},                /* Zulu, zero meridian, a.k.a. UTC */
00197 };
00198 
00199 /* returns:
00200    -1 no day
00201    0 monday - 6 sunday
00202 */
00203 
00204 static int checkday(const char *check, size_t len)
00205 {
00206   int i;
00207   const char * const *what;
00208   bool found= FALSE;
00209   if(len > 3)
00210     what = &weekday[0];
00211   else
00212     what = &Curl_wkday[0];
00213   for(i=0; i<7; i++) {
00214     if(strcasecompare(check, what[0])) {
00215       found=TRUE;
00216       break;
00217     }
00218     what++;
00219   }
00220   return found?i:-1;
00221 }
00222 
00223 static int checkmonth(const char *check)
00224 {
00225   int i;
00226   const char * const *what;
00227   bool found= FALSE;
00228 
00229   what = &Curl_month[0];
00230   for(i=0; i<12; i++) {
00231     if(strcasecompare(check, what[0])) {
00232       found=TRUE;
00233       break;
00234     }
00235     what++;
00236   }
00237   return found?i:-1; /* return the offset or -1, no real offset is -1 */
00238 }
00239 
00240 /* return the time zone offset between GMT and the input one, in number
00241    of seconds or -1 if the timezone wasn't found/legal */
00242 
00243 static int checktz(const char *check)
00244 {
00245   unsigned int i;
00246   const struct tzinfo *what;
00247   bool found= FALSE;
00248 
00249   what = tz;
00250   for(i=0; i< sizeof(tz)/sizeof(tz[0]); i++) {
00251     if(strcasecompare(check, what->name)) {
00252       found=TRUE;
00253       break;
00254     }
00255     what++;
00256   }
00257   return found?what->offset*60:-1;
00258 }
00259 
00260 static void skip(const char **date)
00261 {
00262   /* skip everything that aren't letters or digits */
00263   while(**date && !ISALNUM(**date))
00264     (*date)++;
00265 }
00266 
00267 enum assume {
00268   DATE_MDAY,
00269   DATE_YEAR,
00270   DATE_TIME
00271 };
00272 
00273 /* this is a clone of 'struct tm' but with all fields we don't need or use
00274    cut out */
00275 struct my_tm {
00276   int tm_sec;
00277   int tm_min;
00278   int tm_hour;
00279   int tm_mday;
00280   int tm_mon;
00281   int tm_year;
00282 };
00283 
00284 /* struct tm to time since epoch in GMT time zone.
00285  * This is similar to the standard mktime function but for GMT only, and
00286  * doesn't suffer from the various bugs and portability problems that
00287  * some systems' implementations have.
00288  */
00289 static time_t my_timegm(struct my_tm *tm)
00290 {
00291   static const int month_days_cumulative [12] =
00292     { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
00293   int month, year, leap_days;
00294 
00295   if(tm->tm_year < 70)
00296     /* we don't support years before 1970 as they will cause this function
00297        to return a negative value */
00298     return -1;
00299 
00300   year = tm->tm_year + 1900;
00301   month = tm->tm_mon;
00302   if(month < 0) {
00303     year += (11 - month) / 12;
00304     month = 11 - (11 - month) % 12;
00305   }
00306   else if(month >= 12) {
00307     year -= month / 12;
00308     month = month % 12;
00309   }
00310 
00311   leap_days = year - (tm->tm_mon <= 1);
00312   leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400)
00313                - (1969 / 4) + (1969 / 100) - (1969 / 400));
00314 
00315   return ((((time_t) (year - 1970) * 365
00316             + leap_days + month_days_cumulative [month] + tm->tm_mday - 1) * 24
00317            + tm->tm_hour) * 60 + tm->tm_min) * 60 + tm->tm_sec;
00318 }
00319 
00320 /*
00321  * parsedate()
00322  *
00323  * Returns:
00324  *
00325  * PARSEDATE_OK     - a fine conversion
00326  * PARSEDATE_FAIL   - failed to convert
00327  * PARSEDATE_LATER  - time overflow at the far end of time_t
00328  * PARSEDATE_SOONER - time underflow at the low end of time_t
00329  */
00330 
00331 static int parsedate(const char *date, time_t *output)
00332 {
00333   time_t t = 0;
00334   int wdaynum=-1;  /* day of the week number, 0-6 (mon-sun) */
00335   int monnum=-1;   /* month of the year number, 0-11 */
00336   int mdaynum=-1; /* day of month, 1 - 31 */
00337   int hournum=-1;
00338   int minnum=-1;
00339   int secnum=-1;
00340   int yearnum=-1;
00341   int tzoff=-1;
00342   struct my_tm tm;
00343   enum assume dignext = DATE_MDAY;
00344   const char *indate = date; /* save the original pointer */
00345   int part = 0; /* max 6 parts */
00346 
00347   while(*date && (part < 6)) {
00348     bool found=FALSE;
00349 
00350     skip(&date);
00351 
00352     if(ISALPHA(*date)) {
00353       /* a name coming up */
00354       char buf[32]="";
00355       size_t len;
00356       if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ"
00357                           "abcdefghijklmnopqrstuvwxyz]", buf))
00358         len = strlen(buf);
00359       else
00360         len = 0;
00361 
00362       if(wdaynum == -1) {
00363         wdaynum = checkday(buf, len);
00364         if(wdaynum != -1)
00365           found = TRUE;
00366       }
00367       if(!found && (monnum == -1)) {
00368         monnum = checkmonth(buf);
00369         if(monnum != -1)
00370           found = TRUE;
00371       }
00372 
00373       if(!found && (tzoff == -1)) {
00374         /* this just must be a time zone string */
00375         tzoff = checktz(buf);
00376         if(tzoff != -1)
00377           found = TRUE;
00378       }
00379 
00380       if(!found)
00381         return PARSEDATE_FAIL; /* bad string */
00382 
00383       date += len;
00384     }
00385     else if(ISDIGIT(*date)) {
00386       /* a digit */
00387       int val;
00388       char *end;
00389       int len=0;
00390       if((secnum == -1) &&
00391          (3 == sscanf(date, "%02d:%02d:%02d%n",
00392                       &hournum, &minnum, &secnum, &len))) {
00393         /* time stamp! */
00394         date += len;
00395       }
00396       else if((secnum == -1) &&
00397               (2 == sscanf(date, "%02d:%02d%n", &hournum, &minnum, &len))) {
00398         /* time stamp without seconds */
00399         date += len;
00400         secnum = 0;
00401       }
00402       else {
00403         long lval;
00404         int error;
00405         int old_errno;
00406 
00407         old_errno = ERRNO;
00408         SET_ERRNO(0);
00409         lval = strtol(date, &end, 10);
00410         error = ERRNO;
00411         if(error != old_errno)
00412           SET_ERRNO(old_errno);
00413 
00414         if(error)
00415           return PARSEDATE_FAIL;
00416 
00417 #if LONG_MAX != INT_MAX
00418         if((lval > (long)INT_MAX) || (lval < (long)INT_MIN))
00419           return PARSEDATE_FAIL;
00420 #endif
00421 
00422         val = curlx_sltosi(lval);
00423 
00424         if((tzoff == -1) &&
00425            ((end - date) == 4) &&
00426            (val <= 1400) &&
00427            (indate< date) &&
00428            ((date[-1] == '+' || date[-1] == '-'))) {
00429           /* four digits and a value less than or equal to 1400 (to take into
00430              account all sorts of funny time zone diffs) and it is preceded
00431              with a plus or minus. This is a time zone indication.  1400 is
00432              picked since +1300 is frequently used and +1400 is mentioned as
00433              an edge number in the document "ISO C 200X Proposal: Timezone
00434              Functions" at http://david.tribble.com/text/c0xtimezone.html If
00435              anyone has a more authoritative source for the exact maximum time
00436              zone offsets, please speak up! */
00437           found = TRUE;
00438           tzoff = (val/100 * 60 + val%100)*60;
00439 
00440           /* the + and - prefix indicates the local time compared to GMT,
00441              this we need ther reversed math to get what we want */
00442           tzoff = date[-1]=='+'?-tzoff:tzoff;
00443         }
00444 
00445         if(((end - date) == 8) &&
00446            (yearnum == -1) &&
00447            (monnum == -1) &&
00448            (mdaynum == -1)) {
00449           /* 8 digits, no year, month or day yet. This is YYYYMMDD */
00450           found = TRUE;
00451           yearnum = val/10000;
00452           monnum = (val%10000)/100-1; /* month is 0 - 11 */
00453           mdaynum = val%100;
00454         }
00455 
00456         if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) {
00457           if((val > 0) && (val<32)) {
00458             mdaynum = val;
00459             found = TRUE;
00460           }
00461           dignext = DATE_YEAR;
00462         }
00463 
00464         if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) {
00465           yearnum = val;
00466           found = TRUE;
00467           if(yearnum < 1900) {
00468             if(yearnum > 70)
00469               yearnum += 1900;
00470             else
00471               yearnum += 2000;
00472           }
00473           if(mdaynum == -1)
00474             dignext = DATE_MDAY;
00475         }
00476 
00477         if(!found)
00478           return PARSEDATE_FAIL;
00479 
00480         date = end;
00481       }
00482     }
00483 
00484     part++;
00485   }
00486 
00487   if(-1 == secnum)
00488     secnum = minnum = hournum = 0; /* no time, make it zero */
00489 
00490   if((-1 == mdaynum) ||
00491      (-1 == monnum) ||
00492      (-1 == yearnum))
00493     /* lacks vital info, fail */
00494     return PARSEDATE_FAIL;
00495 
00496 #if SIZEOF_TIME_T < 5
00497   /* 32 bit time_t can only hold dates to the beginning of 2038 */
00498   if(yearnum > 2037) {
00499     *output = 0x7fffffff;
00500     return PARSEDATE_LATER;
00501   }
00502 #endif
00503 
00504   if(yearnum < 1970) {
00505     *output = 0;
00506     return PARSEDATE_SOONER;
00507   }
00508 
00509   if((mdaynum > 31) || (monnum > 11) ||
00510      (hournum > 23) || (minnum > 59) || (secnum > 60))
00511     return PARSEDATE_FAIL; /* clearly an illegal date */
00512 
00513   tm.tm_sec = secnum;
00514   tm.tm_min = minnum;
00515   tm.tm_hour = hournum;
00516   tm.tm_mday = mdaynum;
00517   tm.tm_mon = monnum;
00518   tm.tm_year = yearnum - 1900;
00519 
00520   /* my_timegm() returns a time_t. time_t is often 32 bits, even on many
00521      architectures that feature 64 bit 'long'.
00522 
00523      Some systems have 64 bit time_t and deal with years beyond 2038. However,
00524      even on some of the systems with 64 bit time_t mktime() returns -1 for
00525      dates beyond 03:14:07 UTC, January 19, 2038. (Such as AIX 5100-06)
00526   */
00527   t = my_timegm(&tm);
00528 
00529   /* time zone adjust (cast t to int to compare to negative one) */
00530   if(-1 != (int)t) {
00531 
00532     /* Add the time zone diff between local time zone and GMT. */
00533     long delta = (long)(tzoff!=-1?tzoff:0);
00534 
00535     if((delta>0) && (t > LONG_MAX - delta)) {
00536       *output = 0x7fffffff;
00537       return PARSEDATE_LATER; /* time_t overflow */
00538     }
00539 
00540     t += delta;
00541   }
00542 
00543   *output = t;
00544 
00545   return PARSEDATE_OK;
00546 }
00547 
00548 time_t curl_getdate(const char *p, const time_t *now)
00549 {
00550   time_t parsed = -1;
00551   int rc = parsedate(p, &parsed);
00552   (void)now; /* legacy argument from the past that we ignore */
00553 
00554   switch(rc) {
00555   case PARSEDATE_OK:
00556   case PARSEDATE_LATER:
00557   case PARSEDATE_SOONER:
00558     return parsed;
00559   }
00560   /* everything else is fail */
00561   return -1;
00562 }
00563 
00564 /*
00565  * Curl_gmtime() is a gmtime() replacement for portability. Do not use the
00566  * gmtime_r() or gmtime() functions anywhere else but here.
00567  *
00568  */
00569 
00570 CURLcode Curl_gmtime(time_t intime, struct tm *store)
00571 {
00572   const struct tm *tm;
00573 #ifdef HAVE_GMTIME_R
00574   /* thread-safe version */
00575   tm = (struct tm *)gmtime_r(&intime, store);
00576 #else
00577   tm = gmtime(&intime);
00578   if(tm)
00579     *store = *tm; /* copy the pointed struct to the local copy */
00580 #endif
00581 
00582   if(!tm)
00583     return CURLE_BAD_FUNCTION_ARGUMENT;
00584   return CURLE_OK;
00585 }


rc_visard_driver
Author(s): Heiko Hirschmueller , Christian Emmerich , Felix Ruess
autogenerated on Thu Jun 6 2019 20:43:05