00001 /* 00002 * UPnP XML helper routines 00003 * Copyright (c) 2000-2003 Intel Corporation 00004 * Copyright (c) 2006-2007 Sony Corporation 00005 * Copyright (c) 2008-2009 Atheros Communications 00006 * Copyright (c) 2009, Jouni Malinen <j@w1.fi> 00007 * 00008 * See wps_upnp.c for more details on licensing and code history. 00009 */ 00010 00011 #include "includes.h" 00012 00013 #include "common.h" 00014 #include "base64.h" 00015 #include "http.h" 00016 #include "upnp_xml.h" 00017 00018 00019 /* 00020 * XML parsing and formatting 00021 * 00022 * XML is a markup language based on unicode; usually (and in our case, 00023 * always!) based on utf-8. utf-8 uses a variable number of bytes per 00024 * character. utf-8 has the advantage that all non-ASCII unicode characters are 00025 * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII 00026 * characters are single ascii bytes, thus we can use typical text processing. 00027 * 00028 * (One other interesting thing about utf-8 is that it is possible to look at 00029 * any random byte and determine if it is the first byte of a character as 00030 * versus a continuation byte). 00031 * 00032 * The base syntax of XML uses a few ASCII punctionation characters; any 00033 * characters that would appear in the payload data are rewritten using 00034 * sequences, e.g., & for ampersand(&) and < for left angle bracket (<). 00035 * Five such escapes total (more can be defined but that does not apply to our 00036 * case). Thus we can safely parse for angle brackets etc. 00037 * 00038 * XML describes tree structures of tagged data, with each element beginning 00039 * with an opening tag <label> and ending with a closing tag </label> with 00040 * matching label. (There is also a self-closing tag <label/> which is supposed 00041 * to be equivalent to <label></label>, i.e., no payload, but we are unlikely 00042 * to see it for our purpose). 00043 * 00044 * Actually the opening tags are a little more complicated because they can 00045 * contain "attributes" after the label (delimited by ascii space or tab chars) 00046 * of the form attribute_label="value" or attribute_label='value'; as it turns 00047 * out we do not have to read any of these attributes, just ignore them. 00048 * 00049 * Labels are any sequence of chars other than space, tab, right angle bracket 00050 * (and ?), but may have an inner structure of <namespace><colon><plain_label>. 00051 * As it turns out, we can ignore the namespaces, in fact we can ignore the 00052 * entire tree hierarchy, because the plain labels we are looking for will be 00053 * unique (not in general, but for this application). We do however have to be 00054 * careful to skip over the namespaces. 00055 * 00056 * In generating XML we have to be more careful, but that is easy because 00057 * everything we do is pretty canned. The only real care to take is to escape 00058 * any special chars in our payload. 00059 */ 00060 00078 static int xml_next_tag(const char *in, const char **out, 00079 const char **out_tagname, const char **end) 00080 { 00081 while (*in && *in != '<') 00082 in++; 00083 if (*in != '<') 00084 return 1; 00085 *out = ++in; 00086 if (*in == '/') 00087 in++; 00088 *out_tagname = in; /* maybe */ 00089 while (isalnum(*in) || *in == '-') 00090 in++; 00091 if (*in == ':') 00092 *out_tagname = ++in; 00093 while (*in && *in != '>') 00094 in++; 00095 if (*in != '>') 00096 return 1; 00097 *end = ++in; 00098 return 0; 00099 } 00100 00101 00102 /* xml_data_encode -- format data for xml file, escaping special characters. 00103 * 00104 * Note that we assume we are using utf8 both as input and as output! 00105 * In utf8, characters may be classed as follows: 00106 * 0xxxxxxx(2) -- 1 byte ascii char 00107 * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80 00108 * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here) 00109 * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here) 00110 * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here) 00111 * 10xxxxxx(2) -- extension byte (6 payload bits per byte) 00112 * Some values implied by the above are however illegal because they 00113 * do not represent unicode chars or are not the shortest encoding. 00114 * Actually, we can almost entirely ignore the above and just do 00115 * text processing same as for ascii text. 00116 * 00117 * XML is written with arbitrary unicode characters, except that five 00118 * characters have special meaning and so must be escaped where they 00119 * appear in payload data... which we do here. 00120 */ 00121 void xml_data_encode(struct wpabuf *buf, const char *data, int len) 00122 { 00123 int i; 00124 for (i = 0; i < len; i++) { 00125 u8 c = ((u8 *) data)[i]; 00126 if (c == '<') { 00127 wpabuf_put_str(buf, "<"); 00128 continue; 00129 } 00130 if (c == '>') { 00131 wpabuf_put_str(buf, ">"); 00132 continue; 00133 } 00134 if (c == '&') { 00135 wpabuf_put_str(buf, "&"); 00136 continue; 00137 } 00138 if (c == '\'') { 00139 wpabuf_put_str(buf, "'"); 00140 continue; 00141 } 00142 if (c == '"') { 00143 wpabuf_put_str(buf, """); 00144 continue; 00145 } 00146 /* 00147 * We could try to represent control characters using the 00148 * sequence: &#x; where x is replaced by a hex numeral, but not 00149 * clear why we would do this. 00150 */ 00151 wpabuf_put_u8(buf, c); 00152 } 00153 } 00154 00155 00156 /* xml_add_tagged_data -- format tagged data as a new xml line. 00157 * 00158 * tag must not have any special chars. 00159 * data may have special chars, which are escaped. 00160 */ 00161 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data) 00162 { 00163 wpabuf_printf(buf, "<%s>", tag); 00164 xml_data_encode(buf, data, os_strlen(data)); 00165 wpabuf_printf(buf, "</%s>\n", tag); 00166 } 00167 00168 00169 /* A POST body looks something like (per upnp spec): 00170 * <?xml version="1.0"?> 00171 * <s:Envelope 00172 * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/" 00173 * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/"> 00174 * <s:Body> 00175 * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v"> 00176 * <argumentName>in arg value</argumentName> 00177 * other in args and their values go here, if any 00178 * </u:actionName> 00179 * </s:Body> 00180 * </s:Envelope> 00181 * 00182 * where : 00183 * s: might be some other namespace name followed by colon 00184 * u: might be some other namespace name followed by colon 00185 * actionName will be replaced according to action requested 00186 * schema following actionName will be WFA scheme instead 00187 * argumentName will be actual argument name 00188 * (in arg value) will be actual argument value 00189 */ 00190 char * xml_get_first_item(const char *doc, const char *item) 00191 { 00192 const char *match = item; 00193 int match_len = os_strlen(item); 00194 const char *tag, *tagname, *end; 00195 char *value; 00196 00197 /* 00198 * This is crude: ignore any possible tag name conflicts and go right 00199 * to the first tag of this name. This should be ok for the limited 00200 * domain of UPnP messages. 00201 */ 00202 for (;;) { 00203 if (xml_next_tag(doc, &tag, &tagname, &end)) 00204 return NULL; 00205 doc = end; 00206 if (!os_strncasecmp(tagname, match, match_len) && 00207 *tag != '/' && 00208 (tagname[match_len] == '>' || 00209 !isgraph(tagname[match_len]))) { 00210 break; 00211 } 00212 } 00213 end = doc; 00214 while (*end && *end != '<') 00215 end++; 00216 value = os_zalloc(1 + (end - doc)); 00217 if (value == NULL) 00218 return NULL; 00219 os_memcpy(value, doc, end - doc); 00220 return value; 00221 } 00222 00223 00224 struct wpabuf * xml_get_base64_item(const char *data, const char *name, 00225 enum http_reply_code *ret) 00226 { 00227 char *msg; 00228 struct wpabuf *buf; 00229 unsigned char *decoded; 00230 size_t len; 00231 00232 msg = xml_get_first_item(data, name); 00233 if (msg == NULL) { 00234 *ret = UPNP_ARG_VALUE_INVALID; 00235 return NULL; 00236 } 00237 00238 decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len); 00239 os_free(msg); 00240 if (decoded == NULL) { 00241 *ret = UPNP_OUT_OF_MEMORY; 00242 return NULL; 00243 } 00244 00245 buf = wpabuf_alloc_ext_data(decoded, len); 00246 if (buf == NULL) { 00247 os_free(decoded); 00248 *ret = UPNP_OUT_OF_MEMORY; 00249 return NULL; 00250 } 00251 return buf; 00252 }