wpa_supplicant: upnp_xml.c Source File

Go to the documentation of this file.
00001 /*
00002  * UPnP XML helper routines
00003  * Copyright (c) 2000-2003 Intel Corporation
00004  * Copyright (c) 2006-2007 Sony Corporation
00005  * Copyright (c) 2008-2009 Atheros Communications
00006  * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
00007  *
00008  * See wps_upnp.c for more details on licensing and code history.
00009  */
00010 
00011 #include "includes.h"
00012 
00013 #include "common.h"
00014 #include "base64.h"
00015 #include "http.h"
00016 #include "upnp_xml.h"
00017 
00018 
00019 /*
00020  * XML parsing and formatting
00021  *
00022  * XML is a markup language based on unicode; usually (and in our case,
00023  * always!) based on utf-8. utf-8 uses a variable number of bytes per
00024  * character. utf-8 has the advantage that all non-ASCII unicode characters are
00025  * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
00026  * characters are single ascii bytes, thus we can use typical text processing.
00027  *
00028  * (One other interesting thing about utf-8 is that it is possible to look at
00029  * any random byte and determine if it is the first byte of a character as
00030  * versus a continuation byte).
00031  *
00032  * The base syntax of XML uses a few ASCII punctionation characters; any
00033  * characters that would appear in the payload data are rewritten using
00034  * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
00035  * Five such escapes total (more can be defined but that does not apply to our
00036  * case). Thus we can safely parse for angle brackets etc.
00037  *
00038  * XML describes tree structures of tagged data, with each element beginning
00039  * with an opening tag <label> and ending with a closing tag </label> with
00040  * matching label. (There is also a self-closing tag <label/> which is supposed
00041  * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
00042  * to see it for our purpose).
00043  *
00044  * Actually the opening tags are a little more complicated because they can
00045  * contain "attributes" after the label (delimited by ascii space or tab chars)
00046  * of the form attribute_label="value" or attribute_label='value'; as it turns
00047  * out we do not have to read any of these attributes, just ignore them.
00048  *
00049  * Labels are any sequence of chars other than space, tab, right angle bracket
00050  * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
00051  * As it turns out, we can ignore the namespaces, in fact we can ignore the
00052  * entire tree hierarchy, because the plain labels we are looking for will be
00053  * unique (not in general, but for this application). We do however have to be
00054  * careful to skip over the namespaces.
00055  *
00056  * In generating XML we have to be more careful, but that is easy because
00057  * everything we do is pretty canned. The only real care to take is to escape
00058  * any special chars in our payload.
00059  */
00060 
00078 static int xml_next_tag(const char *in, const char **out,
00079                         const char **out_tagname, const char **end)
00080 {
00081         while (*in && *in != '<')
00082                 in++;
00083         if (*in != '<')
00084                 return 1;
00085         *out = ++in;
00086         if (*in == '/')
00087                 in++;
00088         *out_tagname = in; /* maybe */
00089         while (isalnum(*in) || *in == '-')
00090                 in++;
00091         if (*in == ':')
00092                 *out_tagname = ++in;
00093         while (*in && *in != '>')
00094                 in++;
00095         if (*in != '>')
00096                 return 1;
00097         *end = ++in;
00098         return 0;
00099 }
00100 
00101 
00102 /* xml_data_encode -- format data for xml file, escaping special characters.
00103  *
00104  * Note that we assume we are using utf8 both as input and as output!
00105  * In utf8, characters may be classed as follows:
00106  *     0xxxxxxx(2) -- 1 byte ascii char
00107  *     11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
00108  *         110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
00109  *         1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
00110  *         11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
00111  *      10xxxxxx(2) -- extension byte (6 payload bits per byte)
00112  *      Some values implied by the above are however illegal because they
00113  *      do not represent unicode chars or are not the shortest encoding.
00114  * Actually, we can almost entirely ignore the above and just do
00115  * text processing same as for ascii text.
00116  *
00117  * XML is written with arbitrary unicode characters, except that five
00118  * characters have special meaning and so must be escaped where they
00119  * appear in payload data... which we do here.
00120  */
00121 void xml_data_encode(struct wpabuf *buf, const char *data, int len)
00122 {
00123         int i;
00124         for (i = 0; i < len; i++) {
00125                 u8 c = ((u8 *) data)[i];
00126                 if (c == '<') {
00127                         wpabuf_put_str(buf, "&lt;");
00128                         continue;
00129                 }
00130                 if (c == '>') {
00131                         wpabuf_put_str(buf, "&gt;");
00132                         continue;
00133                 }
00134                 if (c == '&') {
00135                         wpabuf_put_str(buf, "&amp;");
00136                         continue;
00137                 }
00138                 if (c == '\'') {
00139                         wpabuf_put_str(buf, "&apos;");
00140                         continue;
00141                 }
00142                 if (c == '"') {
00143                         wpabuf_put_str(buf, "&quot;");
00144                         continue;
00145                 }
00146                 /*
00147                  * We could try to represent control characters using the
00148                  * sequence: &#x; where x is replaced by a hex numeral, but not
00149                  * clear why we would do this.
00150                  */
00151                 wpabuf_put_u8(buf, c);
00152         }
00153 }
00154 
00155 
00156 /* xml_add_tagged_data -- format tagged data as a new xml line.
00157  *
00158  * tag must not have any special chars.
00159  * data may have special chars, which are escaped.
00160  */
00161 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
00162 {
00163         wpabuf_printf(buf, "<%s>", tag);
00164         xml_data_encode(buf, data, os_strlen(data));
00165         wpabuf_printf(buf, "</%s>\n", tag);
00166 }
00167 
00168 
00169 /* A POST body looks something like (per upnp spec):
00170  * <?xml version="1.0"?>
00171  * <s:Envelope
00172  *     xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
00173  *     s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
00174  *   <s:Body>
00175  *     <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
00176  *       <argumentName>in arg value</argumentName>
00177  *       other in args and their values go here, if any
00178  *     </u:actionName>
00179  *   </s:Body>
00180  * </s:Envelope>
00181  *
00182  * where :
00183  *      s: might be some other namespace name followed by colon
00184  *      u: might be some other namespace name followed by colon
00185  *      actionName will be replaced according to action requested
00186  *      schema following actionName will be WFA scheme instead
00187  *      argumentName will be actual argument name
00188  *      (in arg value) will be actual argument value
00189  */
00190 char * xml_get_first_item(const char *doc, const char *item)
00191 {
00192         const char *match = item;
00193         int match_len = os_strlen(item);
00194         const char *tag, *tagname, *end;
00195         char *value;
00196 
00197         /*
00198          * This is crude: ignore any possible tag name conflicts and go right
00199          * to the first tag of this name. This should be ok for the limited
00200          * domain of UPnP messages.
00201          */
00202         for (;;) {
00203                 if (xml_next_tag(doc, &tag, &tagname, &end))
00204                         return NULL;
00205                 doc = end;
00206                 if (!os_strncasecmp(tagname, match, match_len) &&
00207                     *tag != '/' &&
00208                     (tagname[match_len] == '>' ||
00209                      !isgraph(tagname[match_len]))) {
00210                         break;
00211                 }
00212         }
00213         end = doc;
00214         while (*end && *end != '<')
00215                 end++;
00216         value = os_zalloc(1 + (end - doc));
00217         if (value == NULL)
00218                 return NULL;
00219         os_memcpy(value, doc, end - doc);
00220         return value;
00221 }
00222 
00223 
00224 struct wpabuf * xml_get_base64_item(const char *data, const char *name,
00225                                     enum http_reply_code *ret)
00226 {
00227         char *msg;
00228         struct wpabuf *buf;
00229         unsigned char *decoded;
00230         size_t len;
00231 
00232         msg = xml_get_first_item(data, name);
00233         if (msg == NULL) {
00234                 *ret = UPNP_ARG_VALUE_INVALID;
00235                 return NULL;
00236         }
00237 
00238         decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
00239         os_free(msg);
00240         if (decoded == NULL) {
00241                 *ret = UPNP_OUT_OF_MEMORY;
00242                 return NULL;
00243         }
00244 
00245         buf = wpabuf_alloc_ext_data(decoded, len);
00246         if (buf == NULL) {
00247                 os_free(decoded);
00248                 *ret = UPNP_OUT_OF_MEMORY;
00249                 return NULL;
00250         }
00251         return buf;
00252 }