00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "tool_setup.h"
00023
00024 #define ENABLE_CURLX_PRINTF
00025
00026 #include "curlx.h"
00027 #include "tool_cfgable.h"
00028 #include "tool_doswin.h"
00029 #include "tool_urlglob.h"
00030 #include "tool_vms.h"
00031
00032 #include "memdebug.h"
00033
00034 #define GLOBERROR(string, column, code) \
00035 glob->error = string, glob->pos = column, code
00036
00037 void glob_cleanup(URLGlob* glob);
00038
00039 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
00040 {
00041 URLPattern *pat = &glob->pattern[glob->size];
00042 pat->type = UPTSet;
00043 pat->content.Set.size = 1;
00044 pat->content.Set.ptr_s = 0;
00045 pat->globindex = -1;
00046
00047 pat->content.Set.elements = malloc(sizeof(char *));
00048
00049 if(!pat->content.Set.elements)
00050 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
00051
00052 pat->content.Set.elements[0] = malloc(len+1);
00053 if(!pat->content.Set.elements[0])
00054 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
00055
00056 memcpy(pat->content.Set.elements[0], fixed, len);
00057 pat->content.Set.elements[0][len] = 0;
00058
00059 return CURLE_OK;
00060 }
00061
00062
00063
00064
00065
00066 static int multiply(unsigned long *amount, long with)
00067 {
00068 unsigned long sum = *amount * with;
00069 if(sum/with != *amount)
00070 return 1;
00071 *amount = sum;
00072 return 0;
00073 }
00074
00075 static CURLcode glob_set(URLGlob *glob, char **patternp,
00076 size_t *posp, unsigned long *amount,
00077 int globindex)
00078 {
00079
00080
00081
00082 URLPattern *pat;
00083 bool done = FALSE;
00084 char *buf = glob->glob_buffer;
00085 char *pattern = *patternp;
00086 char *opattern = pattern;
00087 size_t opos = *posp-1;
00088
00089 pat = &glob->pattern[glob->size];
00090
00091 pat->type = UPTSet;
00092 pat->content.Set.size = 0;
00093 pat->content.Set.ptr_s = 0;
00094 pat->content.Set.elements = NULL;
00095 pat->globindex = globindex;
00096
00097 while(!done) {
00098 switch (*pattern) {
00099 case '\0':
00100 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
00101
00102 case '{':
00103 case '[':
00104 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
00105
00106 case '}':
00107 if(opattern == pattern)
00108 return GLOBERROR("empty string within braces", *posp,
00109 CURLE_URL_MALFORMAT);
00110
00111
00112 if(multiply(amount, pat->content.Set.size+1))
00113 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
00114
00115
00116 case ',':
00117
00118 *buf = '\0';
00119 if(pat->content.Set.elements) {
00120 char **new_arr = realloc(pat->content.Set.elements,
00121 (pat->content.Set.size + 1) * sizeof(char *));
00122 if(!new_arr)
00123 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
00124
00125 pat->content.Set.elements = new_arr;
00126 }
00127 else
00128 pat->content.Set.elements = malloc(sizeof(char *));
00129
00130 if(!pat->content.Set.elements)
00131 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
00132
00133 pat->content.Set.elements[pat->content.Set.size] =
00134 strdup(glob->glob_buffer);
00135 if(!pat->content.Set.elements[pat->content.Set.size])
00136 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
00137 ++pat->content.Set.size;
00138
00139 if(*pattern == '}') {
00140 pattern++;
00141 done = TRUE;
00142 continue;
00143 }
00144
00145 buf = glob->glob_buffer;
00146 ++pattern;
00147 ++(*posp);
00148 break;
00149
00150 case ']':
00151 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
00152
00153 case '\\':
00154 if(pattern[1]) {
00155 ++pattern;
00156 ++(*posp);
00157 }
00158
00159 default:
00160 *buf++ = *pattern++;
00161 ++(*posp);
00162 }
00163 }
00164
00165 *patternp = pattern;
00166 return CURLE_OK;
00167 }
00168
00169 static CURLcode glob_range(URLGlob *glob, char **patternp,
00170 size_t *posp, unsigned long *amount,
00171 int globindex)
00172 {
00173
00174
00175
00176
00177
00178
00179 URLPattern *pat;
00180 int rc;
00181 char *pattern = *patternp;
00182 char *c;
00183
00184 pat = &glob->pattern[glob->size];
00185 pat->globindex = globindex;
00186
00187 if(ISALPHA(*pattern)) {
00188
00189 char min_c;
00190 char max_c;
00191 char end_c;
00192 int step=1;
00193
00194 pat->type = UPTCharRange;
00195
00196 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
00197
00198 if(rc == 3) {
00199 if(end_c == ':') {
00200 char *endp;
00201 unsigned long lstep;
00202 errno = 0;
00203 lstep = strtoul(&pattern[4], &endp, 10);
00204 if(errno || &pattern[4] == endp || *endp != ']')
00205 step = -1;
00206 else {
00207 pattern = endp+1;
00208 step = (int)lstep;
00209 if(step > (max_c - min_c))
00210 step = -1;
00211 }
00212 }
00213 else if(end_c != ']')
00214
00215 rc = 0;
00216 else
00217
00218 pattern += 4;
00219 }
00220
00221 *posp += (pattern - *patternp);
00222
00223 if((rc != 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) ||
00224 (step <= 0) )
00225
00226 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
00227
00228
00229 pat->content.CharRange.step = step;
00230 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
00231 pat->content.CharRange.max_c = max_c;
00232
00233 if(multiply(amount, (pat->content.CharRange.max_c -
00234 pat->content.CharRange.min_c) /
00235 pat->content.CharRange.step + 1) )
00236 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
00237 }
00238 else if(ISDIGIT(*pattern)) {
00239
00240 unsigned long min_n;
00241 unsigned long max_n = 0;
00242 unsigned long step_n = 0;
00243 char *endp;
00244
00245 pat->type = UPTNumRange;
00246 pat->content.NumRange.padlength = 0;
00247
00248 if(*pattern == '0') {
00249
00250 c = pattern;
00251 while(ISDIGIT(*c)) {
00252 c++;
00253 ++pat->content.NumRange.padlength;
00254
00255 }
00256 }
00257
00258 errno = 0;
00259 min_n = strtoul(pattern, &endp, 10);
00260 if(errno || (endp == pattern))
00261 endp=NULL;
00262 else {
00263 if(*endp != '-')
00264 endp = NULL;
00265 else {
00266 pattern = endp+1;
00267 while(*pattern && ISBLANK(*pattern))
00268 pattern++;
00269 if(!ISDIGIT(*pattern)) {
00270 endp = NULL;
00271 goto fail;
00272 }
00273 errno = 0;
00274 max_n = strtoul(pattern, &endp, 10);
00275 if(errno || (*endp == ':')) {
00276 pattern = endp+1;
00277 errno = 0;
00278 step_n = strtoul(pattern, &endp, 10);
00279 if(errno)
00280
00281 endp = NULL;
00282 }
00283 else
00284 step_n = 1;
00285 if(endp && (*endp == ']')) {
00286 pattern= endp+1;
00287 }
00288 else
00289 endp = NULL;
00290 }
00291 }
00292
00293 fail:
00294 *posp += (pattern - *patternp);
00295
00296 if(!endp || (min_n > max_n) || (step_n > (max_n - min_n)) || !step_n)
00297
00298 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
00299
00300
00301
00302 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
00303 pat->content.NumRange.max_n = max_n;
00304 pat->content.NumRange.step = step_n;
00305
00306 if(multiply(amount, (pat->content.NumRange.max_n -
00307 pat->content.NumRange.min_n) /
00308 pat->content.NumRange.step + 1) )
00309 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
00310 }
00311 else
00312 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
00313
00314 *patternp = pattern;
00315 return CURLE_OK;
00316 }
00317
00318 static bool peek_ipv6(const char *str, size_t *skip)
00319 {
00320
00321
00322
00323
00324
00325 size_t i = 0;
00326 size_t colons = 0;
00327 if(str[i++] != '[') {
00328 return FALSE;
00329 }
00330 for(;;) {
00331 const char c = str[i++];
00332 if(ISALNUM(c) || c == '.' || c == '%') {
00333
00334 }
00335 else if(c == ':') {
00336 colons++;
00337 }
00338 else if(c == ']') {
00339 *skip = i;
00340 return colons >= 2 ? TRUE : FALSE;
00341 }
00342 else {
00343 return FALSE;
00344 }
00345 }
00346 }
00347
00348 static CURLcode glob_parse(URLGlob *glob, char *pattern,
00349 size_t pos, unsigned long *amount)
00350 {
00351
00352
00353
00354 CURLcode res = CURLE_OK;
00355 int globindex = 0;
00356
00357 *amount = 1;
00358
00359 while(*pattern && !res) {
00360 char *buf = glob->glob_buffer;
00361 size_t sublen = 0;
00362 while(*pattern && *pattern != '{') {
00363 if(*pattern == '[') {
00364
00365 size_t skip;
00366 if(peek_ipv6(pattern, &skip)) {
00367 memcpy(buf, pattern, skip);
00368 buf += skip;
00369 pattern += skip;
00370 sublen += skip;
00371 continue;
00372 }
00373 break;
00374 }
00375 if(*pattern == '}' || *pattern == ']')
00376 return GLOBERROR("unmatched close brace/bracket", pos,
00377 CURLE_URL_MALFORMAT);
00378
00379
00380 if(*pattern == '\\' &&
00381 (*(pattern+1) == '{' || *(pattern+1) == '[' ||
00382 *(pattern+1) == '}' || *(pattern+1) == ']') ) {
00383
00384
00385 ++pattern;
00386 ++pos;
00387 }
00388 *buf++ = *pattern++;
00389 ++pos;
00390 sublen++;
00391 }
00392 if(sublen) {
00393
00394 *buf = '\0';
00395 res = glob_fixed(glob, glob->glob_buffer, sublen);
00396 }
00397 else {
00398 switch (*pattern) {
00399 case '\0':
00400 break;
00401
00402 case '{':
00403
00404 pattern++;
00405 pos++;
00406 res = glob_set(glob, &pattern, &pos, amount, globindex++);
00407 break;
00408
00409 case '[':
00410
00411 pattern++;
00412 pos++;
00413 res = glob_range(glob, &pattern, &pos, amount, globindex++);
00414 break;
00415 }
00416 }
00417
00418 if(++glob->size >= GLOB_PATTERN_NUM)
00419 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
00420 }
00421 return res;
00422 }
00423
00424 CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
00425 FILE *error)
00426 {
00427
00428
00429
00430
00431 URLGlob *glob_expand;
00432 unsigned long amount = 0;
00433 char *glob_buffer;
00434 CURLcode res;
00435
00436 *glob = NULL;
00437
00438 glob_buffer = malloc(strlen(url) + 1);
00439 if(!glob_buffer)
00440 return CURLE_OUT_OF_MEMORY;
00441 glob_buffer[0]=0;
00442
00443 glob_expand = calloc(1, sizeof(URLGlob));
00444 if(!glob_expand) {
00445 Curl_safefree(glob_buffer);
00446 return CURLE_OUT_OF_MEMORY;
00447 }
00448 glob_expand->urllen = strlen(url);
00449 glob_expand->glob_buffer = glob_buffer;
00450
00451 res = glob_parse(glob_expand, url, 1, &amount);
00452 if(!res)
00453 *urlnum = amount;
00454 else {
00455 if(error && glob_expand->error) {
00456 char text[128];
00457 const char *t;
00458 if(glob_expand->pos) {
00459 snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error,
00460 glob_expand->pos);
00461 t = text;
00462 }
00463 else
00464 t = glob_expand->error;
00465
00466
00467 fprintf(error, "curl: (%d) [globbing] %s\n", res, t);
00468 }
00469
00470 glob_cleanup(glob_expand);
00471 *urlnum = 1;
00472 return res;
00473 }
00474
00475 *glob = glob_expand;
00476 return CURLE_OK;
00477 }
00478
00479 void glob_cleanup(URLGlob* glob)
00480 {
00481 size_t i;
00482 int elem;
00483
00484 for(i = 0; i < glob->size; i++) {
00485 if((glob->pattern[i].type == UPTSet) &&
00486 (glob->pattern[i].content.Set.elements)) {
00487 for(elem = glob->pattern[i].content.Set.size - 1;
00488 elem >= 0;
00489 --elem) {
00490 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
00491 }
00492 Curl_safefree(glob->pattern[i].content.Set.elements);
00493 }
00494 }
00495 Curl_safefree(glob->glob_buffer);
00496 Curl_safefree(glob);
00497 }
00498
00499 CURLcode glob_next_url(char **globbed, URLGlob *glob)
00500 {
00501 URLPattern *pat;
00502 size_t i;
00503 size_t len;
00504 size_t buflen = glob->urllen + 1;
00505 char *buf = glob->glob_buffer;
00506
00507 *globbed = NULL;
00508
00509 if(!glob->beenhere)
00510 glob->beenhere = 1;
00511 else {
00512 bool carry = TRUE;
00513
00514
00515
00516 for(i = 0; carry && (i < glob->size); i++) {
00517 carry = FALSE;
00518 pat = &glob->pattern[glob->size - 1 - i];
00519 switch(pat->type) {
00520 case UPTSet:
00521 if((pat->content.Set.elements) &&
00522 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
00523 pat->content.Set.ptr_s = 0;
00524 carry = TRUE;
00525 }
00526 break;
00527 case UPTCharRange:
00528 pat->content.CharRange.ptr_c =
00529 (char)(pat->content.CharRange.step +
00530 (int)((unsigned char)pat->content.CharRange.ptr_c));
00531 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
00532 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
00533 carry = TRUE;
00534 }
00535 break;
00536 case UPTNumRange:
00537 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
00538 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
00539 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
00540 carry = TRUE;
00541 }
00542 break;
00543 default:
00544 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
00545 return CURLE_FAILED_INIT;
00546 }
00547 }
00548 if(carry) {
00549
00550 return CURLE_OK;
00551 }
00552 }
00553
00554 for(i = 0; i < glob->size; ++i) {
00555 pat = &glob->pattern[i];
00556 switch(pat->type) {
00557 case UPTSet:
00558 if(pat->content.Set.elements) {
00559 snprintf(buf, buflen, "%s",
00560 pat->content.Set.elements[pat->content.Set.ptr_s]);
00561 len = strlen(buf);
00562 buf += len;
00563 buflen -= len;
00564 }
00565 break;
00566 case UPTCharRange:
00567 if(buflen) {
00568 *buf++ = pat->content.CharRange.ptr_c;
00569 *buf = '\0';
00570 buflen--;
00571 }
00572 break;
00573 case UPTNumRange:
00574 snprintf(buf, buflen, "%0*ld",
00575 pat->content.NumRange.padlength,
00576 pat->content.NumRange.ptr_n);
00577 len = strlen(buf);
00578 buf += len;
00579 buflen -= len;
00580 break;
00581 default:
00582 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
00583 return CURLE_FAILED_INIT;
00584 }
00585 }
00586
00587 *globbed = strdup(glob->glob_buffer);
00588 if(!*globbed)
00589 return CURLE_OUT_OF_MEMORY;
00590
00591 return CURLE_OK;
00592 }
00593
00594 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
00595 {
00596 char *target;
00597 size_t allocsize;
00598 char numbuf[18];
00599 char *appendthis = NULL;
00600 size_t appendlen = 0;
00601 size_t stringlen = 0;
00602
00603 *result = NULL;
00604
00605
00606
00607
00608
00609 allocsize = strlen(filename) + 1;
00610
00611 target = malloc(allocsize);
00612 if(!target)
00613 return CURLE_OUT_OF_MEMORY;
00614
00615 while(*filename) {
00616 if(*filename == '#' && ISDIGIT(filename[1])) {
00617 unsigned long i;
00618 char *ptr = filename;
00619 unsigned long num = strtoul(&filename[1], &filename, 10);
00620 URLPattern *pat =NULL;
00621
00622 if(num < glob->size) {
00623 num--;
00624
00625 for(i=0; i<glob->size; i++) {
00626 if(glob->pattern[i].globindex == (int)num) {
00627 pat = &glob->pattern[i];
00628 break;
00629 }
00630 }
00631 }
00632
00633 if(pat) {
00634 switch(pat->type) {
00635 case UPTSet:
00636 if(pat->content.Set.elements) {
00637 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
00638 appendlen =
00639 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
00640 }
00641 break;
00642 case UPTCharRange:
00643 numbuf[0] = pat->content.CharRange.ptr_c;
00644 numbuf[1] = 0;
00645 appendthis = numbuf;
00646 appendlen = 1;
00647 break;
00648 case UPTNumRange:
00649 snprintf(numbuf, sizeof(numbuf), "%0*d",
00650 pat->content.NumRange.padlength,
00651 pat->content.NumRange.ptr_n);
00652 appendthis = numbuf;
00653 appendlen = strlen(numbuf);
00654 break;
00655 default:
00656 fprintf(stderr, "internal error: invalid pattern type (%d)\n",
00657 (int)pat->type);
00658 Curl_safefree(target);
00659 return CURLE_FAILED_INIT;
00660 }
00661 }
00662 else {
00663
00664 filename = ptr;
00665 appendthis = filename++;
00666 appendlen = 1;
00667 }
00668 }
00669 else {
00670 appendthis = filename++;
00671 appendlen = 1;
00672 }
00673 if(appendlen + stringlen >= allocsize) {
00674 char *newstr;
00675
00676
00677 allocsize = (appendlen + stringlen) * 2;
00678 newstr = realloc(target, allocsize + 1);
00679 if(!newstr) {
00680 Curl_safefree(target);
00681 return CURLE_OUT_OF_MEMORY;
00682 }
00683 target = newstr;
00684 }
00685 memcpy(&target[stringlen], appendthis, appendlen);
00686 stringlen += appendlen;
00687 }
00688 target[stringlen]= '\0';
00689
00690 #if defined(MSDOS) || defined(WIN32)
00691 {
00692 char *sanitized;
00693 SANITIZEcode sc = sanitize_file_name(&sanitized, target,
00694 (SANITIZE_ALLOW_PATH |
00695 SANITIZE_ALLOW_RESERVED));
00696 Curl_safefree(target);
00697 if(sc)
00698 return CURLE_URL_MALFORMAT;
00699 target = sanitized;
00700 }
00701 #endif
00702
00703 *result = target;
00704 return CURLE_OK;
00705 }