webtest: response.py Source File

Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 import re
00003 from json import loads
00004 
00005 from webtest import forms
00006 from webtest import utils
00007 from webtest.compat import print_stderr
00008 from webtest.compat import splittype
00009 from webtest.compat import splithost
00010 from webtest.compat import PY3
00011 from webtest.compat import urlparse
00012 from webtest.compat import to_bytes
00013 
00014 from six import string_types
00015 from six import binary_type
00016 from six import text_type
00017 
00018 from bs4 import BeautifulSoup
00019 
00020 import webob
00021 
00022 
00023 class TestResponse(webob.Response):
00024     """
00025     Instances of this class are returned by
00026     :class:`~webtest.app.TestApp` methods.
00027     """
00028 
00029     request = None
00030     _forms_indexed = None
00031     parser_features = 'html.parser'
00032 
00033     @property
00034     def forms(self):
00035         """
00036         Returns a dictionary containing all the forms in the pages as
00037         :class:`~webtest.forms.Form` objects. Indexes are both in
00038         order (from zero) and by form id (if the form is given an id).
00039 
00040         See :doc:`forms` for more info on form objects.
00041         """
00042         if self._forms_indexed is None:
00043             self._parse_forms()
00044         return self._forms_indexed
00045 
00046     @property
00047     def form(self):
00048         """
00049         If there is only one form on the page, return it as a
00050         :class:`~webtest.forms.Form` object; raise a TypeError is
00051         there are no form or multiple forms.
00052         """
00053         forms_ = self.forms
00054         if not forms_:
00055             raise TypeError(
00056                 "You used response.form, but no forms exist")
00057         if 1 in forms_:
00058             # There is more than one form
00059             raise TypeError(
00060                 "You used response.form, but more than one form exists")
00061         return forms_[0]
00062 
00063     @property
00064     def testbody(self):
00065         self.decode_content()
00066         if self.charset:
00067             try:
00068                 return self.text
00069             except UnicodeDecodeError:
00070                 return self.body.decode(self.charset, 'replace')
00071         return self.body.decode('ascii', 'replace')
00072 
00073     _tag_re = re.compile(r'<(/?)([:a-z0-9_\-]*)(.*?)>', re.S | re.I)
00074 
00075     def _parse_forms(self):
00076         forms_ = self._forms_indexed = {}
00077         form_texts = [str(f) for f in self.html('form')]
00078         for i, text in enumerate(form_texts):
00079             form = forms.Form(self, text, self.parser_features)
00080             forms_[i] = form
00081             if form.id:
00082                 forms_[form.id] = form
00083 
00084     def _follow(self, **kw):
00085         location = self.headers['location']
00086         abslocation = urlparse.urljoin(self.request.url, location)
00087         type_, rest = splittype(abslocation)
00088         host, path = splithost(rest)
00089         # @@: We should test that it's not a remote redirect
00090         return self.test_app.get(abslocation, **kw)
00091 
00092     def follow(self, **kw):
00093         """
00094         If this response is a redirect, follow that redirect.  It is an
00095         error if it is not a redirect response. Any keyword
00096         arguments are passed to :class:`webtest.app.TestApp.get`. Returns
00097         another :class:`TestResponse` object.
00098         """
00099         assert 300 <= self.status_int < 400, (
00100             "You can only follow redirect responses (not %s)"
00101             % self.status)
00102         return self._follow(**kw)
00103 
00104     def maybe_follow(self, **kw):
00105         """
00106         Follow all redirects. If this response is not a redirect, do nothing.
00107         Any keyword arguments are passed to :class:`webtest.app.TestApp.get`.
00108         Returns another :class:`TestResponse` object.
00109         """
00110         remaining_redirects = 100  # infinite loops protection
00111         response = self
00112 
00113         while 300 <= response.status_int < 400 and remaining_redirects:
00114             response = response._follow(**kw)
00115             remaining_redirects -= 1
00116 
00117         assert remaining_redirects > 0, "redirects chain looks infinite"
00118         return response
00119 
00120     def click(self, description=None, linkid=None, href=None,
00121               index=None, verbose=False,
00122               extra_environ=None):
00123         """
00124         Click the link as described.  Each of ``description``,
00125         ``linkid``, and ``url`` are *patterns*, meaning that they are
00126         either strings (regular expressions), compiled regular
00127         expressions (objects with a ``search`` method), or callables
00128         returning true or false.
00129 
00130         All the given patterns are ANDed together:
00131 
00132         * ``description`` is a pattern that matches the contents of the
00133           anchor (HTML and all -- everything between ``<a...>`` and
00134           ``</a>``)
00135 
00136         * ``linkid`` is a pattern that matches the ``id`` attribute of
00137           the anchor.  It will receive the empty string if no id is
00138           given.
00139 
00140         * ``href`` is a pattern that matches the ``href`` of the anchor;
00141           the literal content of that attribute, not the fully qualified
00142           attribute.
00143 
00144         If more than one link matches, then the ``index`` link is
00145         followed.  If ``index`` is not given and more than one link
00146         matches, or if no link matches, then ``IndexError`` will be
00147         raised.
00148 
00149         If you give ``verbose`` then messages will be printed about
00150         each link, and why it does or doesn't match.  If you use
00151         ``app.click(verbose=True)`` you'll see a list of all the
00152         links.
00153 
00154         You can use multiple criteria to essentially assert multiple
00155         aspects about the link, e.g., where the link's destination is.
00156         """
00157         found_html, found_desc, found_attrs = self._find_element(
00158             tag='a', href_attr='href',
00159             href_extract=None,
00160             content=description,
00161             id=linkid,
00162             href_pattern=href,
00163             index=index, verbose=verbose)
00164         return self.goto(str(found_attrs['uri']), extra_environ=extra_environ)
00165 
00166     def clickbutton(self, description=None, buttonid=None, href=None,
00167                     index=None, verbose=False):
00168         """
00169         Like :meth:`~webtest.response.TestResponse.click`, except looks
00170         for link-like buttons.
00171         This kind of button should look like
00172         ``<button onclick="...location.href='url'...">``.
00173         """
00174         found_html, found_desc, found_attrs = self._find_element(
00175             tag='button', href_attr='onclick',
00176             href_extract=re.compile(r"location\.href='(.*?)'"),
00177             content=description,
00178             id=buttonid,
00179             href_pattern=href,
00180             index=index, verbose=verbose)
00181         return self.goto(str(found_attrs['uri']))
00182 
00183     def _find_element(self, tag, href_attr, href_extract,
00184                       content, id,
00185                       href_pattern,
00186                       index, verbose):
00187         content_pat = utils.make_pattern(content)
00188         id_pat = utils.make_pattern(id)
00189         href_pat = utils.make_pattern(href_pattern)
00190 
00191         def printlog(s):
00192             if verbose:
00193                 print(s)
00194 
00195         found_links = []
00196         total_links = 0
00197         for element in self.html.find_all(tag):
00198             el_html = str(element)
00199             el_content = element.decode_contents()
00200             attrs = element
00201             if verbose:
00202                 printlog('Element: %r' % el_html)
00203             if not attrs.get(href_attr):
00204                 printlog('  Skipped: no %s attribute' % href_attr)
00205                 continue
00206             el_href = attrs[href_attr]
00207             if href_extract:
00208                 m = href_extract.search(el_href)
00209                 if not m:
00210                     printlog("  Skipped: doesn't match extract pattern")
00211                     continue
00212                 el_href = m.group(1)
00213             attrs['uri'] = el_href
00214             if el_href.startswith('#'):
00215                 printlog('  Skipped: only internal fragment href')
00216                 continue
00217             if el_href.startswith('javascript:'):
00218                 printlog('  Skipped: cannot follow javascript:')
00219                 continue
00220             total_links += 1
00221             if content_pat and not content_pat(el_content):
00222                 printlog("  Skipped: doesn't match description")
00223                 continue
00224             if id_pat and not id_pat(attrs.get('id', '')):
00225                 printlog("  Skipped: doesn't match id")
00226                 continue
00227             if href_pat and not href_pat(el_href):
00228                 printlog("  Skipped: doesn't match href")
00229                 continue
00230             printlog("  Accepted")
00231             found_links.append((el_html, el_content, attrs))
00232         if not found_links:
00233             raise IndexError(
00234                 "No matching elements found (from %s possible)"
00235                 % total_links)
00236         if index is None:
00237             if len(found_links) > 1:
00238                 raise IndexError(
00239                     "Multiple links match: %s"
00240                     % ', '.join([repr(anc) for anc, d, attr in found_links]))
00241             found_link = found_links[0]
00242         else:
00243             try:
00244                 found_link = found_links[index]
00245             except IndexError:
00246                 raise IndexError(
00247                     "Only %s (out of %s) links match; index %s out of range"
00248                     % (len(found_links), total_links, index))
00249         return found_link
00250 
00251     def goto(self, href, method='get', **args):
00252         """
00253         Go to the (potentially relative) link ``href``, using the
00254         given method (``'get'`` or ``'post'``) and any extra arguments
00255         you want to pass to the :meth:`webtest.app.TestApp.get` or
00256         :meth:`webtest.app.TestApp.post` methods.
00257 
00258         All hostnames and schemes will be ignored.
00259         """
00260         scheme, host, path, query, fragment = urlparse.urlsplit(href)
00261         # We
00262         scheme = host = fragment = ''
00263         href = urlparse.urlunsplit((scheme, host, path, query, fragment))
00264         href = urlparse.urljoin(self.request.url, href)
00265         method = method.lower()
00266         assert method in ('get', 'post'), (
00267             'Only "get" or "post" are allowed for method (you gave %r)'
00268             % method)
00269 
00270         # encode unicode strings for the outside world
00271         if not PY3 and getattr(self, '_use_unicode', False):
00272             def to_str(s):
00273                 if isinstance(s, text_type):
00274                     return s.encode(self.charset)
00275                 return s
00276 
00277             href = to_str(href)
00278 
00279             if 'params' in args:
00280                 args['params'] = [tuple(map(to_str, p))
00281                                   for p in args['params']]
00282 
00283             if 'upload_files' in args:
00284                 args['upload_files'] = [map(to_str, f)
00285                                         for f in args['upload_files']]
00286 
00287             if 'content_type' in args:
00288                 args['content_type'] = to_str(args['content_type'])
00289 
00290         if method == 'get':
00291             method = self.test_app.get
00292         else:
00293             method = self.test_app.post
00294         return method(href, **args)
00295 
00296     _normal_body_regex = re.compile(to_bytes(r'[ \n\r\t]+'))
00297 
00298     @property
00299     def normal_body(self):
00300         """
00301         Return the whitespace-normalized body
00302         """
00303         if getattr(self, '_normal_body', None) is None:
00304             self._normal_body = self._normal_body_regex.sub(b' ', self.body)
00305         return self._normal_body
00306 
00307     _unicode_normal_body_regex = re.compile('[ \\n\\r\\t]+')
00308 
00309     @property
00310     def unicode_normal_body(self):
00311         """
00312         Return the whitespace-normalized body, as unicode
00313         """
00314         if not self.charset:
00315             raise AttributeError(
00316                 ("You cannot access Response.unicode_normal_body "
00317                  "unless charset is set"))
00318         if getattr(self, '_unicode_normal_body', None) is None:
00319             self._unicode_normal_body = self._unicode_normal_body_regex.sub(
00320                 ' ', self.testbody)
00321         return self._unicode_normal_body
00322 
00323     def __contains__(self, s):
00324         """
00325         A response 'contains' a string if it is present in the body
00326         of the response.  Whitespace is normalized when searching
00327         for a string.
00328         """
00329         if not self.charset and isinstance(s, text_type):
00330             s = s.encode('utf8')
00331         if isinstance(s, binary_type):
00332             return s in self.body or s in self.normal_body
00333         return s in self.testbody or s in self.unicode_normal_body
00334 
00335     def mustcontain(self, *strings, **kw):
00336         """mustcontain(*strings, no=[])
00337 
00338         Assert that the response contains all of the strings passed
00339         in as arguments.
00340 
00341         Equivalent to::
00342 
00343             assert string in res
00344 
00345         Can take a `no` keyword argument that can be a string or a
00346         list of strings which must not be present in the response.
00347         """
00348         if 'no' in kw:
00349             no = kw['no']
00350             del kw['no']
00351             if isinstance(no, string_types):
00352                 no = [no]
00353         else:
00354             no = []
00355         if kw:
00356             raise TypeError(
00357                 "The only keyword argument allowed is 'no'")
00358         for s in strings:
00359             if not s in self:
00360                 print_stderr("Actual response (no %r):" % s)
00361                 print_stderr(str(self))
00362                 raise IndexError(
00363                     "Body does not contain string %r" % s)
00364         for no_s in no:
00365             if no_s in self:
00366                 print_stderr("Actual response (has %r)" % no_s)
00367                 print_stderr(str(self))
00368                 raise IndexError(
00369                     "Body contains bad string %r" % no_s)
00370 
00371     def __str__(self):
00372         simple_body = str('\n').join([l for l in self.testbody.splitlines()
00373                                      if l.strip()])
00374         headers = [(n.title(), v)
00375                    for n, v in self.headerlist
00376                    if n.lower() != 'content-length']
00377         headers.sort()
00378         output = str('Response: %s\n%s\n%s') % (
00379             self.status,
00380             str('\n').join([str('%s: %s') % (n, v) for n, v in headers]),
00381             simple_body)
00382         if not PY3 and isinstance(output, text_type):
00383             output = output.encode(self.charset or 'utf8', 'replace')
00384         return output
00385 
00386     def __unicode__(self):
00387         output = str(self)
00388         if PY3:
00389             return output
00390         return output.decode(self.charset or 'utf8', 'replace')
00391 
00392     def __repr__(self):
00393         # Specifically intended for doctests
00394         if self.content_type:
00395             ct = ' %s' % self.content_type
00396         else:
00397             ct = ''
00398         if self.body:
00399             br = repr(self.body)
00400             if len(br) > 18:
00401                 br = br[:10] + '...' + br[-5:]
00402                 br += '/%s' % len(self.body)
00403             body = ' body=%s' % br
00404         else:
00405             body = ' no body'
00406         if self.location:
00407             location = ' location: %s' % self.location
00408         else:
00409             location = ''
00410         return ('<' + self.status + ct + location + body + '>')
00411 
00412     @property
00413     def html(self):
00414         """
00415         Returns the response as a `BeautifulSoup
00416         <http://www.crummy.com/software/BeautifulSoup/documentation.html>`_
00417         object.
00418 
00419         Only works with HTML responses; other content-types raise
00420         AttributeError.
00421         """
00422         if 'html' not in self.content_type:
00423             raise AttributeError(
00424                 "Not an HTML response body (content-type: %s)"
00425                 % self.content_type)
00426         soup = BeautifulSoup(self.testbody, self.parser_features)
00427         return soup
00428 
00429     @property
00430     def xml(self):
00431         """
00432         Returns the response as an `ElementTree
00433         <http://python.org/doc/current/lib/module-xml.etree.ElementTree.html>`_
00434         object.
00435 
00436         Only works with XML responses; other content-types raise
00437         AttributeError
00438         """
00439         if 'xml' not in self.content_type:
00440             raise AttributeError(
00441                 "Not an XML response body (content-type: %s)"
00442                 % self.content_type)
00443         try:
00444             from xml.etree import ElementTree
00445         except ImportError:  # pragma: no cover
00446             try:
00447                 import ElementTree
00448             except ImportError:
00449                 try:
00450                     from elementtree import ElementTree  # NOQA
00451                 except ImportError:
00452                     raise ImportError(
00453                         ("You must have ElementTree installed "
00454                          "(or use Python 2.5) to use response.xml"))
00455         # ElementTree can't parse unicode => use `body` instead of `testbody`
00456         return ElementTree.XML(self.body)
00457 
00458     @property
00459     def lxml(self):
00460         """
00461         Returns the response as an `lxml object
00462         <http://codespeak.net/lxml/>`_.  You must have lxml installed
00463         to use this.
00464 
00465         If this is an HTML response and you have lxml 2.x installed,
00466         then an ``lxml.html.HTML`` object will be returned; if you
00467         have an earlier version of lxml then a ``lxml.HTML`` object
00468         will be returned.
00469         """
00470         if 'html' not in self.content_type and \
00471            'xml' not in self.content_type:
00472             raise AttributeError(
00473                 "Not an XML or HTML response body (content-type: %s)"
00474                 % self.content_type)
00475         try:
00476             from lxml import etree
00477         except ImportError:  # pragma: no cover
00478             raise ImportError(
00479                 "You must have lxml installed to use response.lxml")
00480         try:
00481             from lxml.html import fromstring
00482         except ImportError:  # pragma: no cover
00483             fromstring = etree.HTML
00484         ## FIXME: would be nice to set xml:base, in some fashion
00485         if self.content_type == 'text/html':
00486             return fromstring(self.testbody, base_url=self.request.url)
00487         else:
00488             return etree.XML(self.testbody, base_url=self.request.url)
00489 
00490     @property
00491     def json(self):
00492         """
00493         Return the response as a JSON response.  You must have `simplejson
00494         <http://goo.gl/B9g6s>`_ installed to use this, or be using a Python
00495         version with the json module.
00496 
00497         The content type must be one of json type to use this.
00498         """
00499         if not self.content_type.endswith(('+json', '/json')):
00500             raise AttributeError(
00501                 "Not a JSON response body (content-type: %s)"
00502                 % self.content_type)
00503         return loads(self.testbody)
00504 
00505     @property
00506     def pyquery(self):
00507         """
00508         Returns the response as a `PyQuery <http://pyquery.org/>`_ object.
00509 
00510         Only works with HTML and XML responses; other content-types raise
00511         AttributeError.
00512         """
00513         if 'html' not in self.content_type and 'xml' not in self.content_type:
00514             raise AttributeError(
00515                 "Not an HTML or XML response body (content-type: %s)"
00516                 % self.content_type)
00517         try:
00518             from pyquery import PyQuery
00519         except ImportError:  # pragma: no cover
00520             raise ImportError(
00521                 "You must have PyQuery installed to use response.pyquery")
00522         d = PyQuery(self.testbody)
00523         return d
00524 
00525     def showbrowser(self):
00526         """
00527         Show this response in a browser window (for debugging purposes,
00528         when it's hard to read the HTML).
00529         """
00530         import webbrowser
00531         import tempfile
00532         f = tempfile.NamedTemporaryFile(prefix='webtest-page',
00533                                         suffix='.html')
00534         name = f.name
00535         f.close()
00536         f = open(name, 'w')
00537         if PY3:
00538             f.write(self.body.decode(self.charset or 'ascii', 'replace'))
00539         else:
00540             f.write(self.body)
00541         f.close()
00542         if name[0] != '/':  # pragma: no cover
00543             # windows ...
00544             url = 'file:///' + name
00545         else:
00546             url = 'file://' + name
00547         webbrowser.open_new(url)