00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 """HTTP utility code shared by clients and servers."""
00018
00019 from __future__ import absolute_import, division, with_statement
00020
00021 import logging
00022 import urllib
00023 import re
00024
00025 from tornado.util import b, ObjectDict
00026
00027
00028 class HTTPHeaders(dict):
00029 """A dictionary that maintains Http-Header-Case for all keys.
00030
00031 Supports multiple values per key via a pair of new methods,
00032 add() and get_list(). The regular dictionary interface returns a single
00033 value per key, with multiple values joined by a comma.
00034
00035 >>> h = HTTPHeaders({"content-type": "text/html"})
00036 >>> h.keys()
00037 ['Content-Type']
00038 >>> h["Content-Type"]
00039 'text/html'
00040
00041 >>> h.add("Set-Cookie", "A=B")
00042 >>> h.add("Set-Cookie", "C=D")
00043 >>> h["set-cookie"]
00044 'A=B,C=D'
00045 >>> h.get_list("set-cookie")
00046 ['A=B', 'C=D']
00047
00048 >>> for (k,v) in sorted(h.get_all()):
00049 ... print '%s: %s' % (k,v)
00050 ...
00051 Content-Type: text/html
00052 Set-Cookie: A=B
00053 Set-Cookie: C=D
00054 """
00055 def __init__(self, *args, **kwargs):
00056
00057
00058 dict.__init__(self)
00059 self._as_list = {}
00060 self._last_key = None
00061 if (len(args) == 1 and len(kwargs) == 0 and
00062 isinstance(args[0], HTTPHeaders)):
00063
00064 for k, v in args[0].get_all():
00065 self.add(k, v)
00066 else:
00067
00068 self.update(*args, **kwargs)
00069
00070
00071
00072 def add(self, name, value):
00073 """Adds a new value for the given key."""
00074 norm_name = HTTPHeaders._normalize_name(name)
00075 self._last_key = norm_name
00076 if norm_name in self:
00077
00078 dict.__setitem__(self, norm_name, self[norm_name] + ',' + value)
00079 self._as_list[norm_name].append(value)
00080 else:
00081 self[norm_name] = value
00082
00083 def get_list(self, name):
00084 """Returns all values for the given header as a list."""
00085 norm_name = HTTPHeaders._normalize_name(name)
00086 return self._as_list.get(norm_name, [])
00087
00088 def get_all(self):
00089 """Returns an iterable of all (name, value) pairs.
00090
00091 If a header has multiple values, multiple pairs will be
00092 returned with the same name.
00093 """
00094 for name, list in self._as_list.iteritems():
00095 for value in list:
00096 yield (name, value)
00097
00098 def parse_line(self, line):
00099 """Updates the dictionary with a single header line.
00100
00101 >>> h = HTTPHeaders()
00102 >>> h.parse_line("Content-Type: text/html")
00103 >>> h.get('content-type')
00104 'text/html'
00105 """
00106 if line[0].isspace():
00107
00108 new_part = ' ' + line.lstrip()
00109 self._as_list[self._last_key][-1] += new_part
00110 dict.__setitem__(self, self._last_key,
00111 self[self._last_key] + new_part)
00112 else:
00113 name, value = line.split(":", 1)
00114 self.add(name, value.strip())
00115
00116 @classmethod
00117 def parse(cls, headers):
00118 """Returns a dictionary from HTTP header text.
00119
00120 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
00121 >>> sorted(h.iteritems())
00122 [('Content-Length', '42'), ('Content-Type', 'text/html')]
00123 """
00124 h = cls()
00125 for line in headers.splitlines():
00126 if line:
00127 h.parse_line(line)
00128 return h
00129
00130
00131
00132 def __setitem__(self, name, value):
00133 norm_name = HTTPHeaders._normalize_name(name)
00134 dict.__setitem__(self, norm_name, value)
00135 self._as_list[norm_name] = [value]
00136
00137 def __getitem__(self, name):
00138 return dict.__getitem__(self, HTTPHeaders._normalize_name(name))
00139
00140 def __delitem__(self, name):
00141 norm_name = HTTPHeaders._normalize_name(name)
00142 dict.__delitem__(self, norm_name)
00143 del self._as_list[norm_name]
00144
00145 def __contains__(self, name):
00146 norm_name = HTTPHeaders._normalize_name(name)
00147 return dict.__contains__(self, norm_name)
00148
00149 def get(self, name, default=None):
00150 return dict.get(self, HTTPHeaders._normalize_name(name), default)
00151
00152 def update(self, *args, **kwargs):
00153
00154 for k, v in dict(*args, **kwargs).iteritems():
00155 self[k] = v
00156
00157 def copy(self):
00158
00159 return HTTPHeaders(self)
00160
00161 _NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9]*(-[A-Z0-9][a-z0-9]*)*$')
00162 _normalized_headers = {}
00163
00164 @staticmethod
00165 def _normalize_name(name):
00166 """Converts a name to Http-Header-Case.
00167
00168 >>> HTTPHeaders._normalize_name("coNtent-TYPE")
00169 'Content-Type'
00170 """
00171 try:
00172 return HTTPHeaders._normalized_headers[name]
00173 except KeyError:
00174 if HTTPHeaders._NORMALIZED_HEADER_RE.match(name):
00175 normalized = name
00176 else:
00177 normalized = "-".join([w.capitalize() for w in name.split("-")])
00178 HTTPHeaders._normalized_headers[name] = normalized
00179 return normalized
00180
00181
00182 def url_concat(url, args):
00183 """Concatenate url and argument dictionary regardless of whether
00184 url has existing query parameters.
00185
00186 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
00187 'http://example.com/foo?a=b&c=d'
00188 """
00189 if not args:
00190 return url
00191 if url[-1] not in ('?', '&'):
00192 url += '&' if ('?' in url) else '?'
00193 return url + urllib.urlencode(args)
00194
00195
00196 class HTTPFile(ObjectDict):
00197 """Represents an HTTP file. For backwards compatibility, its instance
00198 attributes are also accessible as dictionary keys.
00199
00200 :ivar filename:
00201 :ivar body:
00202 :ivar content_type: The content_type comes from the provided HTTP header
00203 and should not be trusted outright given that it can be easily forged.
00204 """
00205 pass
00206
00207
00208 def parse_multipart_form_data(boundary, data, arguments, files):
00209 """Parses a multipart/form-data body.
00210
00211 The boundary and data parameters are both byte strings.
00212 The dictionaries given in the arguments and files parameters
00213 will be updated with the contents of the body.
00214 """
00215
00216
00217
00218
00219
00220 if boundary.startswith(b('"')) and boundary.endswith(b('"')):
00221 boundary = boundary[1:-1]
00222 final_boundary_index = data.rfind(b("--") + boundary + b("--"))
00223 if final_boundary_index == -1:
00224 logging.warning("Invalid multipart/form-data: no final boundary")
00225 return
00226 parts = data[:final_boundary_index].split(b("--") + boundary + b("\r\n"))
00227 for part in parts:
00228 if not part:
00229 continue
00230 eoh = part.find(b("\r\n\r\n"))
00231 if eoh == -1:
00232 logging.warning("multipart/form-data missing headers")
00233 continue
00234 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
00235 disp_header = headers.get("Content-Disposition", "")
00236 disposition, disp_params = _parse_header(disp_header)
00237 if disposition != "form-data" or not part.endswith(b("\r\n")):
00238 logging.warning("Invalid multipart/form-data")
00239 continue
00240 value = part[eoh + 4:-2]
00241 if not disp_params.get("name"):
00242 logging.warning("multipart/form-data value missing name")
00243 continue
00244 name = disp_params["name"]
00245 if disp_params.get("filename"):
00246 ctype = headers.get("Content-Type", "application/unknown")
00247 files.setdefault(name, []).append(HTTPFile(
00248 filename=disp_params["filename"], body=value,
00249 content_type=ctype))
00250 else:
00251 arguments.setdefault(name, []).append(value)
00252
00253
00254
00255
00256
00257 def _parseparam(s):
00258 while s[:1] == ';':
00259 s = s[1:]
00260 end = s.find(';')
00261 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
00262 end = s.find(';', end + 1)
00263 if end < 0:
00264 end = len(s)
00265 f = s[:end]
00266 yield f.strip()
00267 s = s[end:]
00268
00269
00270 def _parse_header(line):
00271 """Parse a Content-type like header.
00272
00273 Return the main content-type and a dictionary of options.
00274
00275 """
00276 parts = _parseparam(';' + line)
00277 key = parts.next()
00278 pdict = {}
00279 for p in parts:
00280 i = p.find('=')
00281 if i >= 0:
00282 name = p[:i].strip().lower()
00283 value = p[i + 1:].strip()
00284 if len(value) >= 2 and value[0] == value[-1] == '"':
00285 value = value[1:-1]
00286 value = value.replace('\\\\', '\\').replace('\\"', '"')
00287 pdict[name] = value
00288 return key, pdict
00289
00290
00291 def doctests():
00292 import doctest
00293 return doctest.DocTestSuite()