http_chunks.c
Go to the documentation of this file.
00001 /***************************************************************************
00002  *                                  _   _ ____  _
00003  *  Project                     ___| | | |  _ \| |
00004  *                             / __| | | | |_) | |
00005  *                            | (__| |_| |  _ <| |___
00006  *                             \___|\___/|_| \_\_____|
00007  *
00008  * Copyright (C) 1998 - 2016, Daniel Stenberg, <daniel@haxx.se>, et al.
00009  *
00010  * This software is licensed as described in the file COPYING, which
00011  * you should have received as part of this distribution. The terms
00012  * are also available at https://curl.haxx.se/docs/copyright.html.
00013  *
00014  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
00015  * copies of the Software, and permit persons to whom the Software is
00016  * furnished to do so, under the terms of the COPYING file.
00017  *
00018  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
00019  * KIND, either express or implied.
00020  *
00021  ***************************************************************************/
00022 
00023 #include "curl_setup.h"
00024 
00025 #ifndef CURL_DISABLE_HTTP
00026 
00027 #include "urldata.h" /* it includes http_chunks.h */
00028 #include "sendf.h"   /* for the client write stuff */
00029 
00030 #include "content_encoding.h"
00031 #include "http.h"
00032 #include "non-ascii.h" /* for Curl_convert_to_network prototype */
00033 #include "strtoofft.h"
00034 #include "warnless.h"
00035 
00036 /* The last #include files should be: */
00037 #include "curl_memory.h"
00038 #include "memdebug.h"
00039 
00040 /*
00041  * Chunk format (simplified):
00042  *
00043  * <HEX SIZE>[ chunk extension ] CRLF
00044  * <DATA> CRLF
00045  *
00046  * Highlights from RFC2616 section 3.6 say:
00047 
00048    The chunked encoding modifies the body of a message in order to
00049    transfer it as a series of chunks, each with its own size indicator,
00050    followed by an OPTIONAL trailer containing entity-header fields. This
00051    allows dynamically produced content to be transferred along with the
00052    information necessary for the recipient to verify that it has
00053    received the full message.
00054 
00055        Chunked-Body   = *chunk
00056                         last-chunk
00057                         trailer
00058                         CRLF
00059 
00060        chunk          = chunk-size [ chunk-extension ] CRLF
00061                         chunk-data CRLF
00062        chunk-size     = 1*HEX
00063        last-chunk     = 1*("0") [ chunk-extension ] CRLF
00064 
00065        chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
00066        chunk-ext-name = token
00067        chunk-ext-val  = token | quoted-string
00068        chunk-data     = chunk-size(OCTET)
00069        trailer        = *(entity-header CRLF)
00070 
00071    The chunk-size field is a string of hex digits indicating the size of
00072    the chunk. The chunked encoding is ended by any chunk whose size is
00073    zero, followed by the trailer, which is terminated by an empty line.
00074 
00075  */
00076 
00077 /* Check for an ASCII hex digit.
00078  We avoid the use of isxdigit to accommodate non-ASCII hosts. */
00079 static bool Curl_isxdigit(char digit)
00080 {
00081   return ( (digit >= 0x30 && digit <= 0x39) /* 0-9 */
00082         || (digit >= 0x41 && digit <= 0x46) /* A-F */
00083         || (digit >= 0x61 && digit <= 0x66) /* a-f */) ? TRUE : FALSE;
00084 }
00085 
00086 void Curl_httpchunk_init(struct connectdata *conn)
00087 {
00088   struct Curl_chunker *chunk = &conn->chunk;
00089   chunk->hexindex=0;        /* start at 0 */
00090   chunk->dataleft=0;        /* no data left yet! */
00091   chunk->state = CHUNK_HEX; /* we get hex first! */
00092 }
00093 
00094 /*
00095  * chunk_read() returns a OK for normal operations, or a positive return code
00096  * for errors. STOP means this sequence of chunks is complete.  The 'wrote'
00097  * argument is set to tell the caller how many bytes we actually passed to the
00098  * client (for byte-counting and whatever).
00099  *
00100  * The states and the state-machine is further explained in the header file.
00101  *
00102  * This function always uses ASCII hex values to accommodate non-ASCII hosts.
00103  * For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
00104  */
00105 CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
00106                               char *datap,
00107                               ssize_t datalen,
00108                               ssize_t *wrotep)
00109 {
00110   CURLcode result=CURLE_OK;
00111   struct Curl_easy *data = conn->data;
00112   struct Curl_chunker *ch = &conn->chunk;
00113   struct SingleRequest *k = &data->req;
00114   size_t piece;
00115   curl_off_t length = (curl_off_t)datalen;
00116   size_t *wrote = (size_t *)wrotep;
00117 
00118   *wrote = 0; /* nothing's written yet */
00119 
00120   /* the original data is written to the client, but we go on with the
00121      chunk read process, to properly calculate the content length*/
00122   if(data->set.http_te_skip && !k->ignorebody) {
00123     result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen);
00124     if(result)
00125       return CHUNKE_WRITE_ERROR;
00126   }
00127 
00128   while(length) {
00129     switch(ch->state) {
00130     case CHUNK_HEX:
00131       if(Curl_isxdigit(*datap)) {
00132         if(ch->hexindex < MAXNUM_SIZE) {
00133           ch->hexbuffer[ch->hexindex] = *datap;
00134           datap++;
00135           length--;
00136           ch->hexindex++;
00137         }
00138         else {
00139           return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
00140         }
00141       }
00142       else {
00143         char *endptr;
00144         if(0 == ch->hexindex)
00145           /* This is illegal data, we received junk where we expected
00146              a hexadecimal digit. */
00147           return CHUNKE_ILLEGAL_HEX;
00148 
00149         /* length and datap are unmodified */
00150         ch->hexbuffer[ch->hexindex]=0;
00151 
00152         /* convert to host encoding before calling strtoul */
00153         result = Curl_convert_from_network(conn->data, ch->hexbuffer,
00154                                            ch->hexindex);
00155         if(result) {
00156           /* Curl_convert_from_network calls failf if unsuccessful */
00157           /* Treat it as a bad hex character */
00158           return CHUNKE_ILLEGAL_HEX;
00159         }
00160 
00161         ch->datasize=curlx_strtoofft(ch->hexbuffer, &endptr, 16);
00162         if((ch->datasize == CURL_OFF_T_MAX) && (errno == ERANGE))
00163           /* overflow is an error */
00164           return CHUNKE_ILLEGAL_HEX;
00165         ch->state = CHUNK_LF; /* now wait for the CRLF */
00166       }
00167       break;
00168 
00169     case CHUNK_LF:
00170       /* waiting for the LF after a chunk size */
00171       if(*datap == 0x0a) {
00172         /* we're now expecting data to come, unless size was zero! */
00173         if(0 == ch->datasize) {
00174           ch->state = CHUNK_TRAILER; /* now check for trailers */
00175           conn->trlPos=0;
00176         }
00177         else
00178           ch->state = CHUNK_DATA;
00179       }
00180 
00181       datap++;
00182       length--;
00183       break;
00184 
00185     case CHUNK_DATA:
00186       /* We expect 'datasize' of data. We have 'length' right now, it can be
00187          more or less than 'datasize'. Get the smallest piece.
00188       */
00189       piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
00190 
00191       /* Write the data portion available */
00192 #ifdef HAVE_LIBZ
00193       switch(conn->data->set.http_ce_skip?
00194              IDENTITY : data->req.auto_decoding) {
00195       case IDENTITY:
00196 #endif
00197         if(!k->ignorebody) {
00198           if(!data->set.http_te_skip)
00199             result = Curl_client_write(conn, CLIENTWRITE_BODY, datap,
00200                                        piece);
00201           else
00202             result = CURLE_OK;
00203         }
00204 #ifdef HAVE_LIBZ
00205         break;
00206 
00207       case DEFLATE:
00208         /* update data->req.keep.str to point to the chunk data. */
00209         data->req.str = datap;
00210         result = Curl_unencode_deflate_write(conn, &data->req,
00211                                              (ssize_t)piece);
00212         break;
00213 
00214       case GZIP:
00215         /* update data->req.keep.str to point to the chunk data. */
00216         data->req.str = datap;
00217         result = Curl_unencode_gzip_write(conn, &data->req,
00218                                           (ssize_t)piece);
00219         break;
00220 
00221       default:
00222         failf(conn->data,
00223               "Unrecognized content encoding type. "
00224               "libcurl understands `identity', `deflate' and `gzip' "
00225               "content encodings.");
00226         return CHUNKE_BAD_ENCODING;
00227       }
00228 #endif
00229 
00230       if(result)
00231         return CHUNKE_WRITE_ERROR;
00232 
00233       *wrote += piece;
00234 
00235       ch->datasize -= piece; /* decrease amount left to expect */
00236       datap += piece;    /* move read pointer forward */
00237       length -= piece;   /* decrease space left in this round */
00238 
00239       if(0 == ch->datasize)
00240         /* end of data this round, we now expect a trailing CRLF */
00241         ch->state = CHUNK_POSTLF;
00242       break;
00243 
00244     case CHUNK_POSTLF:
00245       if(*datap == 0x0a) {
00246         /* The last one before we go back to hex state and start all over. */
00247         Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
00248       }
00249       else if(*datap != 0x0d)
00250         return CHUNKE_BAD_CHUNK;
00251       datap++;
00252       length--;
00253       break;
00254 
00255     case CHUNK_TRAILER:
00256       if((*datap == 0x0d) || (*datap == 0x0a)) {
00257         /* this is the end of a trailer, but if the trailer was zero bytes
00258            there was no trailer and we move on */
00259 
00260         if(conn->trlPos) {
00261           /* we allocate trailer with 3 bytes extra room to fit this */
00262           conn->trailer[conn->trlPos++]=0x0d;
00263           conn->trailer[conn->trlPos++]=0x0a;
00264           conn->trailer[conn->trlPos]=0;
00265 
00266           /* Convert to host encoding before calling Curl_client_write */
00267           result = Curl_convert_from_network(conn->data, conn->trailer,
00268                                              conn->trlPos);
00269           if(result)
00270             /* Curl_convert_from_network calls failf if unsuccessful */
00271             /* Treat it as a bad chunk */
00272             return CHUNKE_BAD_CHUNK;
00273 
00274           if(!data->set.http_te_skip) {
00275             result = Curl_client_write(conn, CLIENTWRITE_HEADER,
00276                                        conn->trailer, conn->trlPos);
00277             if(result)
00278               return CHUNKE_WRITE_ERROR;
00279           }
00280           conn->trlPos=0;
00281           ch->state = CHUNK_TRAILER_CR;
00282           if(*datap == 0x0a)
00283             /* already on the LF */
00284             break;
00285         }
00286         else {
00287           /* no trailer, we're on the final CRLF pair */
00288           ch->state = CHUNK_TRAILER_POSTCR;
00289           break; /* don't advance the pointer */
00290         }
00291       }
00292       else {
00293         /* conn->trailer is assumed to be freed in url.c on a
00294            connection basis */
00295         if(conn->trlPos >= conn->trlMax) {
00296           /* we always allocate three extra bytes, just because when the full
00297              header has been received we append CRLF\0 */
00298           char *ptr;
00299           if(conn->trlMax) {
00300             conn->trlMax *= 2;
00301             ptr = realloc(conn->trailer, conn->trlMax + 3);
00302           }
00303           else {
00304             conn->trlMax=128;
00305             ptr = malloc(conn->trlMax + 3);
00306           }
00307           if(!ptr)
00308             return CHUNKE_OUT_OF_MEMORY;
00309           conn->trailer = ptr;
00310         }
00311         conn->trailer[conn->trlPos++]=*datap;
00312       }
00313       datap++;
00314       length--;
00315       break;
00316 
00317     case CHUNK_TRAILER_CR:
00318       if(*datap == 0x0a) {
00319         ch->state = CHUNK_TRAILER_POSTCR;
00320         datap++;
00321         length--;
00322       }
00323       else
00324         return CHUNKE_BAD_CHUNK;
00325       break;
00326 
00327     case CHUNK_TRAILER_POSTCR:
00328       /* We enter this state when a CR should arrive so we expect to
00329          have to first pass a CR before we wait for LF */
00330       if((*datap != 0x0d) && (*datap != 0x0a)) {
00331         /* not a CR then it must be another header in the trailer */
00332         ch->state = CHUNK_TRAILER;
00333         break;
00334       }
00335       if(*datap == 0x0d) {
00336         /* skip if CR */
00337         datap++;
00338         length--;
00339       }
00340       /* now wait for the final LF */
00341       ch->state = CHUNK_STOP;
00342       break;
00343 
00344     case CHUNK_STOP:
00345       if(*datap == 0x0a) {
00346         length--;
00347 
00348         /* Record the length of any data left in the end of the buffer
00349            even if there's no more chunks to read */
00350         ch->dataleft = curlx_sotouz(length);
00351 
00352         return CHUNKE_STOP; /* return stop */
00353       }
00354       else
00355         return CHUNKE_BAD_CHUNK;
00356     }
00357   }
00358   return CHUNKE_OK;
00359 }
00360 
00361 const char *Curl_chunked_strerror(CHUNKcode code)
00362 {
00363   switch(code) {
00364   default:
00365     return "OK";
00366   case CHUNKE_TOO_LONG_HEX:
00367     return "Too long hexadecimal number";
00368   case CHUNKE_ILLEGAL_HEX:
00369     return "Illegal or missing hexadecimal sequence";
00370   case CHUNKE_BAD_CHUNK:
00371     return "Malformed encoding found";
00372   case CHUNKE_WRITE_ERROR:
00373     return "Write error";
00374   case CHUNKE_BAD_ENCODING:
00375     return "Bad content-encoding found";
00376   case CHUNKE_OUT_OF_MEMORY:
00377     return "Out of memory";
00378   }
00379 }
00380 
00381 #endif /* CURL_DISABLE_HTTP */


rc_visard_driver
Author(s): Heiko Hirschmueller , Christian Emmerich , Felix Ruess
autogenerated on Thu Jun 6 2019 20:43:04