roslint: pep8.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # pep8.py - Check Python source code formatting, according to PEP 8
00003 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
00004 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
00005 # Copyright (C) 2014 Ian Lee <ianlee1521@gmail.com>
00006 #
00007 # Permission is hereby granted, free of charge, to any person
00008 # obtaining a copy of this software and associated documentation files
00009 # (the "Software"), to deal in the Software without restriction,
00010 # including without limitation the rights to use, copy, modify, merge,
00011 # publish, distribute, sublicense, and/or sell copies of the Software,
00012 # and to permit persons to whom the Software is furnished to do so,
00013 # subject to the following conditions:
00014 #
00015 # The above copyright notice and this permission notice shall be
00016 # included in all copies or substantial portions of the Software.
00017 #
00018 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00019 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00021 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
00022 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
00023 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
00024 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00025 # SOFTWARE.
00026 
00027 r"""
00028 Check Python source code formatting, according to PEP 8.
00029 
00030 For usage and a list of options, try this:
00031 $ python pep8.py -h
00032 
00033 This program and its regression test suite live here:
00034 http://github.com/jcrocholl/pep8
00035 
00036 Groups of errors and warnings:
00037 E errors
00038 W warnings
00039 100 indentation
00040 200 whitespace
00041 300 blank lines
00042 400 imports
00043 500 line length
00044 600 deprecation
00045 700 statements
00046 900 syntax error
00047 """
00048 from __future__ import with_statement
00049 
00050 import os
00051 import sys
00052 import re
00053 import time
00054 import inspect
00055 import keyword
00056 import tokenize
00057 from optparse import OptionParser
00058 from fnmatch import fnmatch
00059 try:
00060     from configparser import RawConfigParser
00061     from io import TextIOWrapper
00062 except ImportError:
00063     from ConfigParser import RawConfigParser
00064 
00065 __version__ = '1.6.2'
00066 
00067 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
00068 DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704'
00069 try:
00070     if sys.platform == 'win32':
00071         USER_CONFIG = os.path.expanduser(r'~\.pep8')
00072     else:
00073         USER_CONFIG = os.path.join(
00074             os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'),
00075             'pep8'
00076         )
00077 except ImportError:
00078     USER_CONFIG = None
00079 
00080 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
00081 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
00082 MAX_LINE_LENGTH = 79
00083 REPORT_FORMAT = {
00084     'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
00085     'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
00086 }
00087 
00088 PyCF_ONLY_AST = 1024
00089 SINGLETONS = frozenset(['False', 'None', 'True'])
00090 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
00091 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
00092 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
00093 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
00094 WS_NEEDED_OPERATORS = frozenset([
00095     '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
00096     '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
00097 WHITESPACE = frozenset(' \t')
00098 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
00099 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
00100 # ERRORTOKEN is triggered by backticks in Python 3
00101 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
00102 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
00103 
00104 INDENT_REGEX = re.compile(r'([ \t]*)')
00105 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
00106 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
00107 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
00108 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
00109 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
00110 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
00111 COMPARE_SINGLETON_REGEX = re.compile(r'\b(None|False|True)?\s*([=!]=)'
00112                                      r'\s*(?(1)|(None|False|True))\b')
00113 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
00114 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
00115                                 r'|\s*\(\s*([^)]*[^ )])\s*\))')
00116 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
00117 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
00118 LAMBDA_REGEX = re.compile(r'\blambda\b')
00119 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
00120 
00121 # Work around Python < 2.6 behaviour, which does not generate NL after
00122 # a comment which is on a line by itself.
00123 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
00124 
00125 
00126 ##############################################################################
00127 # Plugins (check functions) for physical lines
00128 ##############################################################################
00129 
00130 
00131 def tabs_or_spaces(physical_line, indent_char):
00132     r"""Never mix tabs and spaces.
00133 
00134     The most popular way of indenting Python is with spaces only.  The
00135     second-most popular way is with tabs only.  Code indented with a mixture
00136     of tabs and spaces should be converted to using spaces exclusively.  When
00137     invoking the Python command line interpreter with the -t option, it issues
00138     warnings about code that illegally mixes tabs and spaces.  When using -tt
00139     these warnings become errors.  These options are highly recommended!
00140 
00141     Okay: if a == 0:\n        a = 1\n        b = 1
00142     E101: if a == 0:\n        a = 1\n\tb = 1
00143     """
00144     indent = INDENT_REGEX.match(physical_line).group(1)
00145     for offset, char in enumerate(indent):
00146         if char != indent_char:
00147             return offset, "E101 indentation contains mixed spaces and tabs"
00148 
00149 
00150 def tabs_obsolete(physical_line):
00151     r"""For new projects, spaces-only are strongly recommended over tabs.
00152 
00153     Okay: if True:\n    return
00154     W191: if True:\n\treturn
00155     """
00156     indent = INDENT_REGEX.match(physical_line).group(1)
00157     if '\t' in indent:
00158         return indent.index('\t'), "W191 indentation contains tabs"
00159 
00160 
00161 def trailing_whitespace(physical_line):
00162     r"""Trailing whitespace is superfluous.
00163 
00164     The warning returned varies on whether the line itself is blank, for easier
00165     filtering for those who want to indent their blank lines.
00166 
00167     Okay: spam(1)\n#
00168     W291: spam(1) \n#
00169     W293: class Foo(object):\n    \n    bang = 12
00170     """
00171     physical_line = physical_line.rstrip('\n')    # chr(10), newline
00172     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
00173     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
00174     stripped = physical_line.rstrip(' \t\v')
00175     if physical_line != stripped:
00176         if stripped:
00177             return len(stripped), "W291 trailing whitespace"
00178         else:
00179             return 0, "W293 blank line contains whitespace"
00180 
00181 
00182 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
00183     r"""Trailing blank lines are superfluous.
00184 
00185     Okay: spam(1)
00186     W391: spam(1)\n
00187 
00188     However the last line should end with a new line (warning W292).
00189     """
00190     if line_number == total_lines:
00191         stripped_last_line = physical_line.rstrip()
00192         if not stripped_last_line:
00193             return 0, "W391 blank line at end of file"
00194         if stripped_last_line == physical_line:
00195             return len(physical_line), "W292 no newline at end of file"
00196 
00197 
00198 def maximum_line_length(physical_line, max_line_length, multiline):
00199     r"""Limit all lines to a maximum of 79 characters.
00200 
00201     There are still many devices around that are limited to 80 character
00202     lines; plus, limiting windows to 80 characters makes it possible to have
00203     several windows side-by-side.  The default wrapping on such devices looks
00204     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
00205     For flowing long blocks of text (docstrings or comments), limiting the
00206     length to 72 characters is recommended.
00207 
00208     Reports error E501.
00209     """
00210     line = physical_line.rstrip()
00211     length = len(line)
00212     if length > max_line_length and not noqa(line):
00213         # Special case for long URLs in multi-line docstrings or comments,
00214         # but still report the error when the 72 first chars are whitespaces.
00215         chunks = line.split()
00216         if ((len(chunks) == 1 and multiline) or
00217             (len(chunks) == 2 and chunks[0] == '#')) and \
00218                 len(line) - len(chunks[-1]) < max_line_length - 7:
00219             return
00220         if hasattr(line, 'decode'):   # Python 2
00221             # The line could contain multi-byte characters
00222             try:
00223                 length = len(line.decode('utf-8'))
00224             except UnicodeError:
00225                 pass
00226         if length > max_line_length:
00227             return (max_line_length, "E501 line too long "
00228                     "(%d > %d characters)" % (length, max_line_length))
00229 
00230 
00231 ##############################################################################
00232 # Plugins (check functions) for logical lines
00233 ##############################################################################
00234 
00235 
00236 def blank_lines(logical_line, blank_lines, indent_level, line_number,
00237                 blank_before, previous_logical, previous_indent_level):
00238     r"""Separate top-level function and class definitions with two blank lines.
00239 
00240     Method definitions inside a class are separated by a single blank line.
00241 
00242     Extra blank lines may be used (sparingly) to separate groups of related
00243     functions.  Blank lines may be omitted between a bunch of related
00244     one-liners (e.g. a set of dummy implementations).
00245 
00246     Use blank lines in functions, sparingly, to indicate logical sections.
00247 
00248     Okay: def a():\n    pass\n\n\ndef b():\n    pass
00249     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
00250 
00251     E301: class Foo:\n    b = 0\n    def bar():\n        pass
00252     E302: def a():\n    pass\n\ndef b(n):\n    pass
00253     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
00254     E303: def a():\n\n\n\n    pass
00255     E304: @decorator\n\ndef a():\n    pass
00256     """
00257     if line_number < 3 and not previous_logical:
00258         return  # Don't expect blank lines before the first line
00259     if previous_logical.startswith('@'):
00260         if blank_lines:
00261             yield 0, "E304 blank lines found after function decorator"
00262     elif blank_lines > 2 or (indent_level and blank_lines == 2):
00263         yield 0, "E303 too many blank lines (%d)" % blank_lines
00264     elif logical_line.startswith(('def ', 'class ', '@')):
00265         if indent_level:
00266             if not (blank_before or previous_indent_level < indent_level or
00267                     DOCSTRING_REGEX.match(previous_logical)):
00268                 yield 0, "E301 expected 1 blank line, found 0"
00269         elif blank_before != 2:
00270             yield 0, "E302 expected 2 blank lines, found %d" % blank_before
00271 
00272 
00273 def extraneous_whitespace(logical_line):
00274     r"""Avoid extraneous whitespace.
00275 
00276     Avoid extraneous whitespace in these situations:
00277     - Immediately inside parentheses, brackets or braces.
00278     - Immediately before a comma, semicolon, or colon.
00279 
00280     Okay: spam(ham[1], {eggs: 2})
00281     E201: spam( ham[1], {eggs: 2})
00282     E201: spam(ham[ 1], {eggs: 2})
00283     E201: spam(ham[1], { eggs: 2})
00284     E202: spam(ham[1], {eggs: 2} )
00285     E202: spam(ham[1 ], {eggs: 2})
00286     E202: spam(ham[1], {eggs: 2 })
00287 
00288     E203: if x == 4: print x, y; x, y = y , x
00289     E203: if x == 4: print x, y ; x, y = y, x
00290     E203: if x == 4 : print x, y; x, y = y, x
00291     """
00292     line = logical_line
00293     for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
00294         text = match.group()
00295         char = text.strip()
00296         found = match.start()
00297         if text == char + ' ':
00298             # assert char in '([{'
00299             yield found + 1, "E201 whitespace after '%s'" % char
00300         elif line[found - 1] != ',':
00301             code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
00302             yield found, "%s whitespace before '%s'" % (code, char)
00303 
00304 
00305 def whitespace_around_keywords(logical_line):
00306     r"""Avoid extraneous whitespace around keywords.
00307 
00308     Okay: True and False
00309     E271: True and  False
00310     E272: True  and False
00311     E273: True and\tFalse
00312     E274: True\tand False
00313     """
00314     for match in KEYWORD_REGEX.finditer(logical_line):
00315         before, after = match.groups()
00316 
00317         if '\t' in before:
00318             yield match.start(1), "E274 tab before keyword"
00319         elif len(before) > 1:
00320             yield match.start(1), "E272 multiple spaces before keyword"
00321 
00322         if '\t' in after:
00323             yield match.start(2), "E273 tab after keyword"
00324         elif len(after) > 1:
00325             yield match.start(2), "E271 multiple spaces after keyword"
00326 
00327 
00328 def missing_whitespace(logical_line):
00329     r"""Each comma, semicolon or colon should be followed by whitespace.
00330 
00331     Okay: [a, b]
00332     Okay: (3,)
00333     Okay: a[1:4]
00334     Okay: a[:4]
00335     Okay: a[1:]
00336     Okay: a[1:4:2]
00337     E231: ['a','b']
00338     E231: foo(bar,baz)
00339     E231: [{'a':'b'}]
00340     """
00341     line = logical_line
00342     for index in range(len(line) - 1):
00343         char = line[index]
00344         if char in ',;:' and line[index + 1] not in WHITESPACE:
00345             before = line[:index]
00346             if char == ':' and before.count('[') > before.count(']') and \
00347                     before.rfind('{') < before.rfind('['):
00348                 continue  # Slice syntax, no space required
00349             if char == ',' and line[index + 1] == ')':
00350                 continue  # Allow tuple with only one element: (3,)
00351             yield index, "E231 missing whitespace after '%s'" % char
00352 
00353 
00354 def indentation(logical_line, previous_logical, indent_char,
00355                 indent_level, previous_indent_level):
00356     r"""Use 4 spaces per indentation level.
00357 
00358     For really old code that you don't want to mess up, you can continue to
00359     use 8-space tabs.
00360 
00361     Okay: a = 1
00362     Okay: if a == 0:\n    a = 1
00363     E111:   a = 1
00364     E114:   # a = 1
00365 
00366     Okay: for item in items:\n    pass
00367     E112: for item in items:\npass
00368     E115: for item in items:\n# Hi\n    pass
00369 
00370     Okay: a = 1\nb = 2
00371     E113: a = 1\n    b = 2
00372     E116: a = 1\n    # b = 2
00373     """
00374     c = 0 if logical_line else 3
00375     tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
00376     if indent_level % 4:
00377         yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
00378     indent_expect = previous_logical.endswith(':')
00379     if indent_expect and indent_level <= previous_indent_level:
00380         yield 0, tmpl % (2 + c, "expected an indented block")
00381     elif not indent_expect and indent_level > previous_indent_level:
00382         yield 0, tmpl % (3 + c, "unexpected indentation")
00383 
00384 
00385 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
00386                           indent_char, noqa, verbose):
00387     r"""Continuation lines indentation.
00388 
00389     Continuation lines should align wrapped elements either vertically
00390     using Python's implicit line joining inside parentheses, brackets
00391     and braces, or using a hanging indent.
00392 
00393     When using a hanging indent these considerations should be applied:
00394     - there should be no arguments on the first line, and
00395     - further indentation should be used to clearly distinguish itself as a
00396       continuation line.
00397 
00398     Okay: a = (\n)
00399     E123: a = (\n    )
00400 
00401     Okay: a = (\n    42)
00402     E121: a = (\n   42)
00403     E122: a = (\n42)
00404     E123: a = (\n    42\n    )
00405     E124: a = (24,\n     42\n)
00406     E125: if (\n    b):\n    pass
00407     E126: a = (\n        42)
00408     E127: a = (24,\n      42)
00409     E128: a = (24,\n    42)
00410     E129: if (a or\n    b):\n    pass
00411     E131: a = (\n    42\n 24)
00412     """
00413     first_row = tokens[0][2][0]
00414     nrows = 1 + tokens[-1][2][0] - first_row
00415     if noqa or nrows == 1:
00416         return
00417 
00418     # indent_next tells us whether the next block is indented; assuming
00419     # that it is indented by 4 spaces, then we should not allow 4-space
00420     # indents on the final continuation line; in turn, some other
00421     # indents are allowed to have an extra 4 spaces.
00422     indent_next = logical_line.endswith(':')
00423 
00424     row = depth = 0
00425     valid_hangs = (4,) if indent_char != '\t' else (4, 8)
00426     # remember how many brackets were opened on each line
00427     parens = [0] * nrows
00428     # relative indents of physical lines
00429     rel_indent = [0] * nrows
00430     # for each depth, collect a list of opening rows
00431     open_rows = [[0]]
00432     # for each depth, memorize the hanging indentation
00433     hangs = [None]
00434     # visual indents
00435     indent_chances = {}
00436     last_indent = tokens[0][2]
00437     visual_indent = None
00438     last_token_multiline = False
00439     # for each depth, memorize the visual indent column
00440     indent = [last_indent[1]]
00441     if verbose >= 3:
00442         print(">>> " + tokens[0][4].rstrip())
00443 
00444     for token_type, text, start, end, line in tokens:
00445 
00446         newline = row < start[0] - first_row
00447         if newline:
00448             row = start[0] - first_row
00449             newline = not last_token_multiline and token_type not in NEWLINE
00450 
00451         if newline:
00452             # this is the beginning of a continuation line.
00453             last_indent = start
00454             if verbose >= 3:
00455                 print("... " + line.rstrip())
00456 
00457             # record the initial indent.
00458             rel_indent[row] = expand_indent(line) - indent_level
00459 
00460             # identify closing bracket
00461             close_bracket = (token_type == tokenize.OP and text in ']})')
00462 
00463             # is the indent relative to an opening bracket line?
00464             for open_row in reversed(open_rows[depth]):
00465                 hang = rel_indent[row] - rel_indent[open_row]
00466                 hanging_indent = hang in valid_hangs
00467                 if hanging_indent:
00468                     break
00469             if hangs[depth]:
00470                 hanging_indent = (hang == hangs[depth])
00471             # is there any chance of visual indent?
00472             visual_indent = (not close_bracket and hang > 0 and
00473                              indent_chances.get(start[1]))
00474 
00475             if close_bracket and indent[depth]:
00476                 # closing bracket for visual indent
00477                 if start[1] != indent[depth]:
00478                     yield (start, "E124 closing bracket does not match "
00479                            "visual indentation")
00480             elif close_bracket and not hang:
00481                 # closing bracket matches indentation of opening bracket's line
00482                 if hang_closing:
00483                     yield start, "E133 closing bracket is missing indentation"
00484             elif indent[depth] and start[1] < indent[depth]:
00485                 if visual_indent is not True:
00486                     # visual indent is broken
00487                     yield (start, "E128 continuation line "
00488                            "under-indented for visual indent")
00489             elif hanging_indent or (indent_next and rel_indent[row] == 8):
00490                 # hanging indent is verified
00491                 if close_bracket and not hang_closing:
00492                     yield (start, "E123 closing bracket does not match "
00493                            "indentation of opening bracket's line")
00494                 hangs[depth] = hang
00495             elif visual_indent is True:
00496                 # visual indent is verified
00497                 indent[depth] = start[1]
00498             elif visual_indent in (text, str):
00499                 # ignore token lined up with matching one from a previous line
00500                 pass
00501             else:
00502                 # indent is broken
00503                 if hang <= 0:
00504                     error = "E122", "missing indentation or outdented"
00505                 elif indent[depth]:
00506                     error = "E127", "over-indented for visual indent"
00507                 elif not close_bracket and hangs[depth]:
00508                     error = "E131", "unaligned for hanging indent"
00509                 else:
00510                     hangs[depth] = hang
00511                     if hang > 4:
00512                         error = "E126", "over-indented for hanging indent"
00513                     else:
00514                         error = "E121", "under-indented for hanging indent"
00515                 yield start, "%s continuation line %s" % error
00516 
00517         # look for visual indenting
00518         if (parens[row] and
00519                 token_type not in (tokenize.NL, tokenize.COMMENT) and
00520                 not indent[depth]):
00521             indent[depth] = start[1]
00522             indent_chances[start[1]] = True
00523             if verbose >= 4:
00524                 print("bracket depth %s indent to %s" % (depth, start[1]))
00525         # deal with implicit string concatenation
00526         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
00527               text in ('u', 'ur', 'b', 'br')):
00528             indent_chances[start[1]] = str
00529         # special case for the "if" statement because len("if (") == 4
00530         elif not indent_chances and not row and not depth and text == 'if':
00531             indent_chances[end[1] + 1] = True
00532         elif text == ':' and line[end[1]:].isspace():
00533             open_rows[depth].append(row)
00534 
00535         # keep track of bracket depth
00536         if token_type == tokenize.OP:
00537             if text in '([{':
00538                 depth += 1
00539                 indent.append(0)
00540                 hangs.append(None)
00541                 if len(open_rows) == depth:
00542                     open_rows.append([])
00543                 open_rows[depth].append(row)
00544                 parens[row] += 1
00545                 if verbose >= 4:
00546                     print("bracket depth %s seen, col %s, visual min = %s" %
00547                           (depth, start[1], indent[depth]))
00548             elif text in ')]}' and depth > 0:
00549                 # parent indents should not be more than this one
00550                 prev_indent = indent.pop() or last_indent[1]
00551                 hangs.pop()
00552                 for d in range(depth):
00553                     if indent[d] > prev_indent:
00554                         indent[d] = 0
00555                 for ind in list(indent_chances):
00556                     if ind >= prev_indent:
00557                         del indent_chances[ind]
00558                 del open_rows[depth + 1:]
00559                 depth -= 1
00560                 if depth:
00561                     indent_chances[indent[depth]] = True
00562                 for idx in range(row, -1, -1):
00563                     if parens[idx]:
00564                         parens[idx] -= 1
00565                         break
00566             assert len(indent) == depth + 1
00567             if start[1] not in indent_chances:
00568                 # allow to line up tokens
00569                 indent_chances[start[1]] = text
00570 
00571         last_token_multiline = (start[0] != end[0])
00572         if last_token_multiline:
00573             rel_indent[end[0] - first_row] = rel_indent[row]
00574 
00575     if indent_next and expand_indent(line) == indent_level + 4:
00576         pos = (start[0], indent[0] + 4)
00577         if visual_indent:
00578             code = "E129 visually indented line"
00579         else:
00580             code = "E125 continuation line"
00581         yield pos, "%s with same indent as next logical line" % code
00582 
00583 
00584 def whitespace_before_parameters(logical_line, tokens):
00585     r"""Avoid extraneous whitespace.
00586 
00587     Avoid extraneous whitespace in the following situations:
00588     - before the open parenthesis that starts the argument list of a
00589       function call.
00590     - before the open parenthesis that starts an indexing or slicing.
00591 
00592     Okay: spam(1)
00593     E211: spam (1)
00594 
00595     Okay: dict['key'] = list[index]
00596     E211: dict ['key'] = list[index]
00597     E211: dict['key'] = list [index]
00598     """
00599     prev_type, prev_text, __, prev_end, __ = tokens[0]
00600     for index in range(1, len(tokens)):
00601         token_type, text, start, end, __ = tokens[index]
00602         if (token_type == tokenize.OP and
00603             text in '([' and
00604             start != prev_end and
00605             (prev_type == tokenize.NAME or prev_text in '}])') and
00606             # Syntax "class A (B):" is allowed, but avoid it
00607             (index < 2 or tokens[index - 2][1] != 'class') and
00608                 # Allow "return (a.foo for a in range(5))"
00609                 not keyword.iskeyword(prev_text)):
00610             yield prev_end, "E211 whitespace before '%s'" % text
00611         prev_type = token_type
00612         prev_text = text
00613         prev_end = end
00614 
00615 
00616 def whitespace_around_operator(logical_line):
00617     r"""Avoid extraneous whitespace around an operator.
00618 
00619     Okay: a = 12 + 3
00620     E221: a = 4  + 5
00621     E222: a = 4 +  5
00622     E223: a = 4\t+ 5
00623     E224: a = 4 +\t5
00624     """
00625     for match in OPERATOR_REGEX.finditer(logical_line):
00626         before, after = match.groups()
00627 
00628         if '\t' in before:
00629             yield match.start(1), "E223 tab before operator"
00630         elif len(before) > 1:
00631             yield match.start(1), "E221 multiple spaces before operator"
00632 
00633         if '\t' in after:
00634             yield match.start(2), "E224 tab after operator"
00635         elif len(after) > 1:
00636             yield match.start(2), "E222 multiple spaces after operator"
00637 
00638 
00639 def missing_whitespace_around_operator(logical_line, tokens):
00640     r"""Surround operators with a single space on either side.
00641 
00642     - Always surround these binary operators with a single space on
00643       either side: assignment (=), augmented assignment (+=, -= etc.),
00644       comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
00645       Booleans (and, or, not).
00646 
00647     - If operators with different priorities are used, consider adding
00648       whitespace around the operators with the lowest priorities.
00649 
00650     Okay: i = i + 1
00651     Okay: submitted += 1
00652     Okay: x = x * 2 - 1
00653     Okay: hypot2 = x * x + y * y
00654     Okay: c = (a + b) * (a - b)
00655     Okay: foo(bar, key='word', *args, **kwargs)
00656     Okay: alpha[:-i]
00657 
00658     E225: i=i+1
00659     E225: submitted +=1
00660     E225: x = x /2 - 1
00661     E225: z = x **y
00662     E226: c = (a+b) * (a-b)
00663     E226: hypot2 = x*x + y*y
00664     E227: c = a|b
00665     E228: msg = fmt%(errno, errmsg)
00666     """
00667     parens = 0
00668     need_space = False
00669     prev_type = tokenize.OP
00670     prev_text = prev_end = None
00671     for token_type, text, start, end, line in tokens:
00672         if token_type in SKIP_COMMENTS:
00673             continue
00674         if text in ('(', 'lambda'):
00675             parens += 1
00676         elif text == ')':
00677             parens -= 1
00678         if need_space:
00679             if start != prev_end:
00680                 # Found a (probably) needed space
00681                 if need_space is not True and not need_space[1]:
00682                     yield (need_space[0],
00683                            "E225 missing whitespace around operator")
00684                 need_space = False
00685             elif text == '>' and prev_text in ('<', '-'):
00686                 # Tolerate the "<>" operator, even if running Python 3
00687                 # Deal with Python 3's annotated return value "->"
00688                 pass
00689             else:
00690                 if need_space is True or need_space[1]:
00691                     # A needed trailing space was not found
00692                     yield prev_end, "E225 missing whitespace around operator"
00693                 elif prev_text != '**':
00694                     code, optype = 'E226', 'arithmetic'
00695                     if prev_text == '%':
00696                         code, optype = 'E228', 'modulo'
00697                     elif prev_text not in ARITHMETIC_OP:
00698                         code, optype = 'E227', 'bitwise or shift'
00699                     yield (need_space[0], "%s missing whitespace "
00700                            "around %s operator" % (code, optype))
00701                 need_space = False
00702         elif token_type == tokenize.OP and prev_end is not None:
00703             if text == '=' and parens:
00704                 # Allow keyword args or defaults: foo(bar=None).
00705                 pass
00706             elif text in WS_NEEDED_OPERATORS:
00707                 need_space = True
00708             elif text in UNARY_OPERATORS:
00709                 # Check if the operator is being used as a binary operator
00710                 # Allow unary operators: -123, -x, +1.
00711                 # Allow argument unpacking: foo(*args, **kwargs).
00712                 if (prev_text in '}])' if prev_type == tokenize.OP
00713                         else prev_text not in KEYWORDS):
00714                     need_space = None
00715             elif text in WS_OPTIONAL_OPERATORS:
00716                 need_space = None
00717 
00718             if need_space is None:
00719                 # Surrounding space is optional, but ensure that
00720                 # trailing space matches opening space
00721                 need_space = (prev_end, start != prev_end)
00722             elif need_space and start == prev_end:
00723                 # A needed opening space was not found
00724                 yield prev_end, "E225 missing whitespace around operator"
00725                 need_space = False
00726         prev_type = token_type
00727         prev_text = text
00728         prev_end = end
00729 
00730 
00731 def whitespace_around_comma(logical_line):
00732     r"""Avoid extraneous whitespace after a comma or a colon.
00733 
00734     Note: these checks are disabled by default
00735 
00736     Okay: a = (1, 2)
00737     E241: a = (1,  2)
00738     E242: a = (1,\t2)
00739     """
00740     line = logical_line
00741     for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
00742         found = m.start() + 1
00743         if '\t' in m.group():
00744             yield found, "E242 tab after '%s'" % m.group()[0]
00745         else:
00746             yield found, "E241 multiple spaces after '%s'" % m.group()[0]
00747 
00748 
00749 def whitespace_around_named_parameter_equals(logical_line, tokens):
00750     r"""Don't use spaces around the '=' sign in function arguments.
00751 
00752     Don't use spaces around the '=' sign when used to indicate a
00753     keyword argument or a default parameter value.
00754 
00755     Okay: def complex(real, imag=0.0):
00756     Okay: return magic(r=real, i=imag)
00757     Okay: boolean(a == b)
00758     Okay: boolean(a != b)
00759     Okay: boolean(a <= b)
00760     Okay: boolean(a >= b)
00761     Okay: def foo(arg: int = 42):
00762 
00763     E251: def complex(real, imag = 0.0):
00764     E251: return magic(r = real, i = imag)
00765     """
00766     parens = 0
00767     no_space = False
00768     prev_end = None
00769     annotated_func_arg = False
00770     in_def = logical_line.startswith('def')
00771     message = "E251 unexpected spaces around keyword / parameter equals"
00772     for token_type, text, start, end, line in tokens:
00773         if token_type == tokenize.NL:
00774             continue
00775         if no_space:
00776             no_space = False
00777             if start != prev_end:
00778                 yield (prev_end, message)
00779         if token_type == tokenize.OP:
00780             if text == '(':
00781                 parens += 1
00782             elif text == ')':
00783                 parens -= 1
00784             elif in_def and text == ':' and parens == 1:
00785                 annotated_func_arg = True
00786             elif parens and text == ',' and parens == 1:
00787                 annotated_func_arg = False
00788             elif parens and text == '=' and not annotated_func_arg:
00789                 no_space = True
00790                 if start != prev_end:
00791                     yield (prev_end, message)
00792             if not parens:
00793                 annotated_func_arg = False
00794 
00795         prev_end = end
00796 
00797 
00798 def whitespace_before_comment(logical_line, tokens):
00799     r"""Separate inline comments by at least two spaces.
00800 
00801     An inline comment is a comment on the same line as a statement.  Inline
00802     comments should be separated by at least two spaces from the statement.
00803     They should start with a # and a single space.
00804 
00805     Each line of a block comment starts with a # and a single space
00806     (unless it is indented text inside the comment).
00807 
00808     Okay: x = x + 1  # Increment x
00809     Okay: x = x + 1    # Increment x
00810     Okay: # Block comment
00811     E261: x = x + 1 # Increment x
00812     E262: x = x + 1  #Increment x
00813     E262: x = x + 1  #  Increment x
00814     E265: #Block comment
00815     E266: ### Block comment
00816     """
00817     prev_end = (0, 0)
00818     for token_type, text, start, end, line in tokens:
00819         if token_type == tokenize.COMMENT:
00820             inline_comment = line[:start[1]].strip()
00821             if inline_comment:
00822                 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
00823                     yield (prev_end,
00824                            "E261 at least two spaces before inline comment")
00825             symbol, sp, comment = text.partition(' ')
00826             bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
00827             if inline_comment:
00828                 if bad_prefix or comment[:1] in WHITESPACE:
00829                     yield start, "E262 inline comment should start with '# '"
00830             elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
00831                 if bad_prefix != '#':
00832                     yield start, "E265 block comment should start with '# '"
00833                 elif comment:
00834                     yield start, "E266 too many leading '#' for block comment"
00835         elif token_type != tokenize.NL:
00836             prev_end = end
00837 
00838 
00839 def imports_on_separate_lines(logical_line):
00840     r"""Imports should usually be on separate lines.
00841 
00842     Okay: import os\nimport sys
00843     E401: import sys, os
00844 
00845     Okay: from subprocess import Popen, PIPE
00846     Okay: from myclas import MyClass
00847     Okay: from foo.bar.yourclass import YourClass
00848     Okay: import myclass
00849     Okay: import foo.bar.yourclass
00850     """
00851     line = logical_line
00852     if line.startswith('import '):
00853         found = line.find(',')
00854         if -1 < found and ';' not in line[:found]:
00855             yield found, "E401 multiple imports on one line"
00856 
00857 
00858 def module_imports_on_top_of_file(
00859         logical_line, indent_level, checker_state, noqa):
00860     r"""Imports are always put at the top of the file, just after any module
00861     comments and docstrings, and before module globals and constants.
00862 
00863     Okay: import os
00864     Okay: # this is a comment\nimport os
00865     Okay: '''this is a module docstring'''\nimport os
00866     Okay: r'''this is a module docstring'''\nimport os
00867     Okay: try:\n    import x\nexcept:\n    pass\nelse:\n    pass\nimport y
00868     Okay: try:\n    import x\nexcept:\n    pass\nfinally:\n    pass\nimport y
00869     E402: a=1\nimport os
00870     E402: 'One string'\n"Two string"\nimport os
00871     E402: a=1\nfrom sys import x
00872 
00873     Okay: if x:\n    import os
00874     """
00875     def is_string_literal(line):
00876         if line[0] in 'uUbB':
00877             line = line[1:]
00878         if line and line[0] in 'rR':
00879             line = line[1:]
00880         return line and (line[0] == '"' or line[0] == "'")
00881 
00882     allowed_try_keywords = ('try', 'except', 'else', 'finally')
00883 
00884     if indent_level:  # Allow imports in conditional statements or functions
00885         return
00886     if not logical_line:  # Allow empty lines or comments
00887         return
00888     if noqa:
00889         return
00890     line = logical_line
00891     if line.startswith('import ') or line.startswith('from '):
00892         if checker_state.get('seen_non_imports', False):
00893             yield 0, "E402 module level import not at top of file"
00894     elif any(line.startswith(kw) for kw in allowed_try_keywords):
00895         # Allow try, except, else, finally keywords intermixed with imports in
00896         # order to support conditional importing
00897         return
00898     elif is_string_literal(line):
00899         # The first literal is a docstring, allow it. Otherwise, report error.
00900         if checker_state.get('seen_docstring', False):
00901             checker_state['seen_non_imports'] = True
00902         else:
00903             checker_state['seen_docstring'] = True
00904     else:
00905         checker_state['seen_non_imports'] = True
00906 
00907 
00908 def compound_statements(logical_line):
00909     r"""Compound statements (on the same line) are generally discouraged.
00910 
00911     While sometimes it's okay to put an if/for/while with a small body
00912     on the same line, never do this for multi-clause statements.
00913     Also avoid folding such long lines!
00914 
00915     Always use a def statement instead of an assignment statement that
00916     binds a lambda expression directly to a name.
00917 
00918     Okay: if foo == 'blah':\n    do_blah_thing()
00919     Okay: do_one()
00920     Okay: do_two()
00921     Okay: do_three()
00922 
00923     E701: if foo == 'blah': do_blah_thing()
00924     E701: for x in lst: total += x
00925     E701: while t < 10: t = delay()
00926     E701: if foo == 'blah': do_blah_thing()
00927     E701: else: do_non_blah_thing()
00928     E701: try: something()
00929     E701: finally: cleanup()
00930     E701: if foo == 'blah': one(); two(); three()
00931     E702: do_one(); do_two(); do_three()
00932     E703: do_four();  # useless semicolon
00933     E704: def f(x): return 2*x
00934     E731: f = lambda x: 2*x
00935     """
00936     line = logical_line
00937     last_char = len(line) - 1
00938     found = line.find(':')
00939     while -1 < found < last_char:
00940         before = line[:found]
00941         if ((before.count('{') <= before.count('}') and   # {'a': 1} (dict)
00942              before.count('[') <= before.count(']') and   # [1:2] (slice)
00943              before.count('(') <= before.count(')'))):    # (annotation)
00944             lambda_kw = LAMBDA_REGEX.search(before)
00945             if lambda_kw:
00946                 before = line[:lambda_kw.start()].rstrip()
00947                 if before[-1:] == '=' and isidentifier(before[:-1].strip()):
00948                     yield 0, ("E731 do not assign a lambda expression, use a "
00949                               "def")
00950                 break
00951             if before.startswith('def '):
00952                 yield 0, "E704 multiple statements on one line (def)"
00953             else:
00954                 yield found, "E701 multiple statements on one line (colon)"
00955         found = line.find(':', found + 1)
00956     found = line.find(';')
00957     while -1 < found:
00958         if found < last_char:
00959             yield found, "E702 multiple statements on one line (semicolon)"
00960         else:
00961             yield found, "E703 statement ends with a semicolon"
00962         found = line.find(';', found + 1)
00963 
00964 
00965 def explicit_line_join(logical_line, tokens):
00966     r"""Avoid explicit line join between brackets.
00967 
00968     The preferred way of wrapping long lines is by using Python's implied line
00969     continuation inside parentheses, brackets and braces.  Long lines can be
00970     broken over multiple lines by wrapping expressions in parentheses.  These
00971     should be used in preference to using a backslash for line continuation.
00972 
00973     E502: aaa = [123, \\n       123]
00974     E502: aaa = ("bbb " \\n       "ccc")
00975 
00976     Okay: aaa = [123,\n       123]
00977     Okay: aaa = ("bbb "\n       "ccc")
00978     Okay: aaa = "bbb " \\n    "ccc"
00979     Okay: aaa = 123  # \\
00980     """
00981     prev_start = prev_end = parens = 0
00982     comment = False
00983     backslash = None
00984     for token_type, text, start, end, line in tokens:
00985         if token_type == tokenize.COMMENT:
00986             comment = True
00987         if start[0] != prev_start and parens and backslash and not comment:
00988             yield backslash, "E502 the backslash is redundant between brackets"
00989         if end[0] != prev_end:
00990             if line.rstrip('\r\n').endswith('\\'):
00991                 backslash = (end[0], len(line.splitlines()[-1]) - 1)
00992             else:
00993                 backslash = None
00994             prev_start = prev_end = end[0]
00995         else:
00996             prev_start = start[0]
00997         if token_type == tokenize.OP:
00998             if text in '([{':
00999                 parens += 1
01000             elif text in ')]}':
01001                 parens -= 1
01002 
01003 
01004 def break_around_binary_operator(logical_line, tokens):
01005     r"""
01006     Avoid breaks before binary operators.
01007 
01008     The preferred place to break around a binary operator is after the
01009     operator, not before it.
01010 
01011     W503: (width == 0\n + height == 0)
01012     W503: (width == 0\n and height == 0)
01013 
01014     Okay: (width == 0 +\n height == 0)
01015     Okay: foo(\n    -x)
01016     Okay: foo(x\n    [])
01017     Okay: x = '''\n''' + ''
01018     Okay: foo(x,\n    -y)
01019     Okay: foo(x,  # comment\n    -y)
01020     """
01021     def is_binary_operator(token_type, text):
01022         # The % character is strictly speaking a binary operator, but the
01023         # common usage seems to be to put it next to the format parameters,
01024         # after a line break.
01025         return ((token_type == tokenize.OP or text in ['and', 'or']) and
01026                 text not in "()[]{},:.;@=%")
01027 
01028     line_break = False
01029     unary_context = True
01030     for token_type, text, start, end, line in tokens:
01031         if token_type == tokenize.COMMENT:
01032             continue
01033         if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
01034             line_break = True
01035         else:
01036             if (is_binary_operator(token_type, text) and line_break and
01037                     not unary_context):
01038                 yield start, "W503 line break before binary operator"
01039             unary_context = text in '([{,;'
01040             line_break = False
01041 
01042 
01043 def comparison_to_singleton(logical_line, noqa):
01044     r"""Comparison to singletons should use "is" or "is not".
01045 
01046     Comparisons to singletons like None should always be done
01047     with "is" or "is not", never the equality operators.
01048 
01049     Okay: if arg is not None:
01050     E711: if arg != None:
01051     E711: if None == arg:
01052     E712: if arg == True:
01053     E712: if False == arg:
01054 
01055     Also, beware of writing if x when you really mean if x is not None --
01056     e.g. when testing whether a variable or argument that defaults to None was
01057     set to some other value.  The other value might have a type (such as a
01058     container) that could be false in a boolean context!
01059     """
01060     match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
01061     if match:
01062         singleton = match.group(1) or match.group(3)
01063         same = (match.group(2) == '==')
01064 
01065         msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
01066         if singleton in ('None',):
01067             code = 'E711'
01068         else:
01069             code = 'E712'
01070             nonzero = ((singleton == 'True' and same) or
01071                        (singleton == 'False' and not same))
01072             msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
01073         yield match.start(2), ("%s comparison to %s should be %s" %
01074                                (code, singleton, msg))
01075 
01076 
01077 def comparison_negative(logical_line):
01078     r"""Negative comparison should be done using "not in" and "is not".
01079 
01080     Okay: if x not in y:\n    pass
01081     Okay: assert (X in Y or X is Z)
01082     Okay: if not (X in Y):\n    pass
01083     Okay: zz = x is not y
01084     E713: Z = not X in Y
01085     E713: if not X.B in Y:\n    pass
01086     E714: if not X is Y:\n    pass
01087     E714: Z = not X.B is Y
01088     """
01089     match = COMPARE_NEGATIVE_REGEX.search(logical_line)
01090     if match:
01091         pos = match.start(1)
01092         if match.group(2) == 'in':
01093             yield pos, "E713 test for membership should be 'not in'"
01094         else:
01095             yield pos, "E714 test for object identity should be 'is not'"
01096 
01097 
01098 def comparison_type(logical_line, noqa):
01099     r"""Object type comparisons should always use isinstance().
01100 
01101     Do not compare types directly.
01102 
01103     Okay: if isinstance(obj, int):
01104     E721: if type(obj) is type(1):
01105 
01106     When checking if an object is a string, keep in mind that it might be a
01107     unicode string too! In Python 2.3, str and unicode have a common base
01108     class, basestring, so you can do:
01109 
01110     Okay: if isinstance(obj, basestring):
01111     Okay: if type(a1) is type(b1):
01112     """
01113     match = COMPARE_TYPE_REGEX.search(logical_line)
01114     if match and not noqa:
01115         inst = match.group(1)
01116         if inst and isidentifier(inst) and inst not in SINGLETONS:
01117             return  # Allow comparison for types which are not obvious
01118         yield match.start(), "E721 do not compare types, use 'isinstance()'"
01119 
01120 
01121 def python_3000_has_key(logical_line, noqa):
01122     r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
01123 
01124     Okay: if "alph" in d:\n    print d["alph"]
01125     W601: assert d.has_key('alph')
01126     """
01127     pos = logical_line.find('.has_key(')
01128     if pos > -1 and not noqa:
01129         yield pos, "W601 .has_key() is deprecated, use 'in'"
01130 
01131 
01132 def python_3000_raise_comma(logical_line):
01133     r"""When raising an exception, use "raise ValueError('message')".
01134 
01135     The older form is removed in Python 3.
01136 
01137     Okay: raise DummyError("Message")
01138     W602: raise DummyError, "Message"
01139     """
01140     match = RAISE_COMMA_REGEX.match(logical_line)
01141     if match and not RERAISE_COMMA_REGEX.match(logical_line):
01142         yield match.end() - 1, "W602 deprecated form of raising exception"
01143 
01144 
01145 def python_3000_not_equal(logical_line):
01146     r"""New code should always use != instead of <>.
01147 
01148     The older syntax is removed in Python 3.
01149 
01150     Okay: if a != 'no':
01151     W603: if a <> 'no':
01152     """
01153     pos = logical_line.find('<>')
01154     if pos > -1:
01155         yield pos, "W603 '<>' is deprecated, use '!='"
01156 
01157 
01158 def python_3000_backticks(logical_line):
01159     r"""Backticks are removed in Python 3: use repr() instead.
01160 
01161     Okay: val = repr(1 + 2)
01162     W604: val = `1 + 2`
01163     """
01164     pos = logical_line.find('`')
01165     if pos > -1:
01166         yield pos, "W604 backticks are deprecated, use 'repr()'"
01167 
01168 
01169 ##############################################################################
01170 # Helper functions
01171 ##############################################################################
01172 
01173 
01174 if '' == ''.encode():
01175     # Python 2: implicit encoding.
01176     def readlines(filename):
01177         """Read the source code."""
01178         with open(filename, 'rU') as f:
01179             return f.readlines()
01180     isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
01181     stdin_get_value = sys.stdin.read
01182 else:
01183     # Python 3
01184     def readlines(filename):
01185         """Read the source code."""
01186         try:
01187             with open(filename, 'rb') as f:
01188                 (coding, lines) = tokenize.detect_encoding(f.readline)
01189                 f = TextIOWrapper(f, coding, line_buffering=True)
01190                 return [l.decode(coding) for l in lines] + f.readlines()
01191         except (LookupError, SyntaxError, UnicodeError):
01192             # Fall back if file encoding is improperly declared
01193             with open(filename, encoding='latin-1') as f:
01194                 return f.readlines()
01195     isidentifier = str.isidentifier
01196 
01197     def stdin_get_value():
01198         return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
01199 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
01200 
01201 
01202 def expand_indent(line):
01203     r"""Return the amount of indentation.
01204 
01205     Tabs are expanded to the next multiple of 8.
01206 
01207     >>> expand_indent('    ')
01208     4
01209     >>> expand_indent('\t')
01210     8
01211     >>> expand_indent('       \t')
01212     8
01213     >>> expand_indent('        \t')
01214     16
01215     """
01216     if '\t' not in line:
01217         return len(line) - len(line.lstrip())
01218     result = 0
01219     for char in line:
01220         if char == '\t':
01221             result = result // 8 * 8 + 8
01222         elif char == ' ':
01223             result += 1
01224         else:
01225             break
01226     return result
01227 
01228 
01229 def mute_string(text):
01230     """Replace contents with 'xxx' to prevent syntax matching.
01231 
01232     >>> mute_string('"abc"')
01233     '"xxx"'
01234     >>> mute_string("'''abc'''")
01235     "'''xxx'''"
01236     >>> mute_string("r'abc'")
01237     "r'xxx'"
01238     """
01239     # String modifiers (e.g. u or r)
01240     start = text.index(text[-1]) + 1
01241     end = len(text) - 1
01242     # Triple quotes
01243     if text[-3:] in ('"""', "'''"):
01244         start += 2
01245         end -= 2
01246     return text[:start] + 'x' * (end - start) + text[end:]
01247 
01248 
01249 def parse_udiff(diff, patterns=None, parent='.'):
01250     """Return a dictionary of matching lines."""
01251     # For each file of the diff, the entry key is the filename,
01252     # and the value is a set of row numbers to consider.
01253     rv = {}
01254     path = nrows = None
01255     for line in diff.splitlines():
01256         if nrows:
01257             if line[:1] != '-':
01258                 nrows -= 1
01259             continue
01260         if line[:3] == '@@ ':
01261             hunk_match = HUNK_REGEX.match(line)
01262             (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
01263             rv[path].update(range(row, row + nrows))
01264         elif line[:3] == '+++':
01265             path = line[4:].split('\t', 1)[0]
01266             if path[:2] == 'b/':
01267                 path = path[2:]
01268             rv[path] = set()
01269     return dict([(os.path.join(parent, path), rows)
01270                  for (path, rows) in rv.items()
01271                  if rows and filename_match(path, patterns)])
01272 
01273 
01274 def normalize_paths(value, parent=os.curdir):
01275     """Parse a comma-separated list of paths.
01276 
01277     Return a list of absolute paths.
01278     """
01279     if not value:
01280         return []
01281     if isinstance(value, list):
01282         return value
01283     paths = []
01284     for path in value.split(','):
01285         path = path.strip()
01286         if '/' in path:
01287             path = os.path.abspath(os.path.join(parent, path))
01288         paths.append(path.rstrip('/'))
01289     return paths
01290 
01291 
01292 def filename_match(filename, patterns, default=True):
01293     """Check if patterns contains a pattern that matches filename.
01294 
01295     If patterns is unspecified, this always returns True.
01296     """
01297     if not patterns:
01298         return default
01299     return any(fnmatch(filename, pattern) for pattern in patterns)
01300 
01301 
01302 def _is_eol_token(token):
01303     return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
01304 if COMMENT_WITH_NL:
01305     def _is_eol_token(token, _eol_token=_is_eol_token):
01306         return _eol_token(token) or (token[0] == tokenize.COMMENT and
01307                                      token[1] == token[4])
01308 
01309 ##############################################################################
01310 # Framework to run all checks
01311 ##############################################################################
01312 
01313 
01314 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
01315 
01316 
01317 def register_check(check, codes=None):
01318     """Register a new check object."""
01319     def _add_check(check, kind, codes, args):
01320         if check in _checks[kind]:
01321             _checks[kind][check][0].extend(codes or [])
01322         else:
01323             _checks[kind][check] = (codes or [''], args)
01324     if inspect.isfunction(check):
01325         args = inspect.getargspec(check)[0]
01326         if args and args[0] in ('physical_line', 'logical_line'):
01327             if codes is None:
01328                 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
01329             _add_check(check, args[0], codes, args)
01330     elif inspect.isclass(check):
01331         if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
01332             _add_check(check, 'tree', codes, None)
01333 
01334 
01335 def init_checks_registry():
01336     """Register all globally visible functions.
01337 
01338     The first argument name is either 'physical_line' or 'logical_line'.
01339     """
01340     mod = inspect.getmodule(register_check)
01341     for (name, function) in inspect.getmembers(mod, inspect.isfunction):
01342         register_check(function)
01343 init_checks_registry()
01344 
01345 
01346 class Checker(object):
01347     """Load a Python source file, tokenize it, check coding style."""
01348 
01349     def __init__(self, filename=None, lines=None,
01350                  options=None, report=None, **kwargs):
01351         if options is None:
01352             options = StyleGuide(kwargs).options
01353         else:
01354             assert not kwargs
01355         self._io_error = None
01356         self._physical_checks = options.physical_checks
01357         self._logical_checks = options.logical_checks
01358         self._ast_checks = options.ast_checks
01359         self.max_line_length = options.max_line_length
01360         self.multiline = False  # in a multiline string?
01361         self.hang_closing = options.hang_closing
01362         self.verbose = options.verbose
01363         self.filename = filename
01364         # Dictionary where a checker can store its custom state.
01365         self._checker_states = {}
01366         if filename is None:
01367             self.filename = 'stdin'
01368             self.lines = lines or []
01369         elif filename == '-':
01370             self.filename = 'stdin'
01371             self.lines = stdin_get_value().splitlines(True)
01372         elif lines is None:
01373             try:
01374                 self.lines = readlines(filename)
01375             except IOError:
01376                 (exc_type, exc) = sys.exc_info()[:2]
01377                 self._io_error = '%s: %s' % (exc_type.__name__, exc)
01378                 self.lines = []
01379         else:
01380             self.lines = lines
01381         if self.lines:
01382             ord0 = ord(self.lines[0][0])
01383             if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
01384                 if ord0 == 0xfeff:
01385                     self.lines[0] = self.lines[0][1:]
01386                 elif self.lines[0][:3] == '\xef\xbb\xbf':
01387                     self.lines[0] = self.lines[0][3:]
01388         self.report = report or options.report
01389         self.report_error = self.report.error
01390 
01391     def report_invalid_syntax(self):
01392         """Check if the syntax is valid."""
01393         (exc_type, exc) = sys.exc_info()[:2]
01394         if len(exc.args) > 1:
01395             offset = exc.args[1]
01396             if len(offset) > 2:
01397                 offset = offset[1:3]
01398         else:
01399             offset = (1, 0)
01400         self.report_error(offset[0], offset[1] or 0,
01401                           'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
01402                           self.report_invalid_syntax)
01403 
01404     def readline(self):
01405         """Get the next line from the input buffer."""
01406         if self.line_number >= self.total_lines:
01407             return ''
01408         line = self.lines[self.line_number]
01409         self.line_number += 1
01410         if self.indent_char is None and line[:1] in WHITESPACE:
01411             self.indent_char = line[0]
01412         return line
01413 
01414     def run_check(self, check, argument_names):
01415         """Run a check plugin."""
01416         arguments = []
01417         for name in argument_names:
01418             arguments.append(getattr(self, name))
01419         return check(*arguments)
01420 
01421     def init_checker_state(self, name, argument_names):
01422         """ Prepares a custom state for the specific checker plugin."""
01423         if 'checker_state' in argument_names:
01424             self.checker_state = self._checker_states.setdefault(name, {})
01425 
01426     def check_physical(self, line):
01427         """Run all physical checks on a raw input line."""
01428         self.physical_line = line
01429         for name, check, argument_names in self._physical_checks:
01430             self.init_checker_state(name, argument_names)
01431             result = self.run_check(check, argument_names)
01432             if result is not None:
01433                 (offset, text) = result
01434                 self.report_error(self.line_number, offset, text, check)
01435                 if text[:4] == 'E101':
01436                     self.indent_char = line[0]
01437 
01438     def build_tokens_line(self):
01439         """Build a logical line from tokens."""
01440         logical = []
01441         comments = []
01442         length = 0
01443         prev_row = prev_col = mapping = None
01444         for token_type, text, start, end, line in self.tokens:
01445             if token_type in SKIP_TOKENS:
01446                 continue
01447             if not mapping:
01448                 mapping = [(0, start)]
01449             if token_type == tokenize.COMMENT:
01450                 comments.append(text)
01451                 continue
01452             if token_type == tokenize.STRING:
01453                 text = mute_string(text)
01454             if prev_row:
01455                 (start_row, start_col) = start
01456                 if prev_row != start_row:    # different row
01457                     prev_text = self.lines[prev_row - 1][prev_col - 1]
01458                     if prev_text == ',' or (prev_text not in '{[(' and
01459                                             text not in '}])'):
01460                         text = ' ' + text
01461                 elif prev_col != start_col:  # different column
01462                     text = line[prev_col:start_col] + text
01463             logical.append(text)
01464             length += len(text)
01465             mapping.append((length, end))
01466             (prev_row, prev_col) = end
01467         self.logical_line = ''.join(logical)
01468         self.noqa = comments and noqa(''.join(comments))
01469         return mapping
01470 
01471     def check_logical(self):
01472         """Build a line from tokens and run all logical checks on it."""
01473         self.report.increment_logical_line()
01474         mapping = self.build_tokens_line()
01475 
01476         if not mapping:
01477             return
01478 
01479         (start_row, start_col) = mapping[0][1]
01480         start_line = self.lines[start_row - 1]
01481         self.indent_level = expand_indent(start_line[:start_col])
01482         if self.blank_before < self.blank_lines:
01483             self.blank_before = self.blank_lines
01484         if self.verbose >= 2:
01485             print(self.logical_line[:80].rstrip())
01486         for name, check, argument_names in self._logical_checks:
01487             if self.verbose >= 4:
01488                 print('   ' + name)
01489             self.init_checker_state(name, argument_names)
01490             for offset, text in self.run_check(check, argument_names) or ():
01491                 if not isinstance(offset, tuple):
01492                     for token_offset, pos in mapping:
01493                         if offset <= token_offset:
01494                             break
01495                     offset = (pos[0], pos[1] + offset - token_offset)
01496                 self.report_error(offset[0], offset[1], text, check)
01497         if self.logical_line:
01498             self.previous_indent_level = self.indent_level
01499             self.previous_logical = self.logical_line
01500         self.blank_lines = 0
01501         self.tokens = []
01502 
01503     def check_ast(self):
01504         """Build the file's AST and run all AST checks."""
01505         try:
01506             tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
01507         except (SyntaxError, TypeError):
01508             return self.report_invalid_syntax()
01509         for name, cls, __ in self._ast_checks:
01510             checker = cls(tree, self.filename)
01511             for lineno, offset, text, check in checker.run():
01512                 if not self.lines or not noqa(self.lines[lineno - 1]):
01513                     self.report_error(lineno, offset, text, check)
01514 
01515     def generate_tokens(self):
01516         """Tokenize the file, run physical line checks and yield tokens."""
01517         if self._io_error:
01518             self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
01519         tokengen = tokenize.generate_tokens(self.readline)
01520         try:
01521             for token in tokengen:
01522                 if token[2][0] > self.total_lines:
01523                     return
01524                 self.maybe_check_physical(token)
01525                 yield token
01526         except (SyntaxError, tokenize.TokenError):
01527             self.report_invalid_syntax()
01528 
01529     def maybe_check_physical(self, token):
01530         """If appropriate (based on token), check current physical line(s)."""
01531         # Called after every token, but act only on end of line.
01532         if _is_eol_token(token):
01533             # Obviously, a newline token ends a single physical line.
01534             self.check_physical(token[4])
01535         elif token[0] == tokenize.STRING and '\n' in token[1]:
01536             # Less obviously, a string that contains newlines is a
01537             # multiline string, either triple-quoted or with internal
01538             # newlines backslash-escaped. Check every physical line in the
01539             # string *except* for the last one: its newline is outside of
01540             # the multiline string, so we consider it a regular physical
01541             # line, and will check it like any other physical line.
01542             #
01543             # Subtleties:
01544             # - we don't *completely* ignore the last line; if it contains
01545             #   the magical "# noqa" comment, we disable all physical
01546             #   checks for the entire multiline string
01547             # - have to wind self.line_number back because initially it
01548             #   points to the last line of the string, and we want
01549             #   check_physical() to give accurate feedback
01550             if noqa(token[4]):
01551                 return
01552             self.multiline = True
01553             self.line_number = token[2][0]
01554             for line in token[1].split('\n')[:-1]:
01555                 self.check_physical(line + '\n')
01556                 self.line_number += 1
01557             self.multiline = False
01558 
01559     def check_all(self, expected=None, line_offset=0):
01560         """Run all checks on the input file."""
01561         self.report.init_file(self.filename, self.lines, expected, line_offset)
01562         self.total_lines = len(self.lines)
01563         if self._ast_checks:
01564             self.check_ast()
01565         self.line_number = 0
01566         self.indent_char = None
01567         self.indent_level = self.previous_indent_level = 0
01568         self.previous_logical = ''
01569         self.tokens = []
01570         self.blank_lines = self.blank_before = 0
01571         parens = 0
01572         for token in self.generate_tokens():
01573             self.tokens.append(token)
01574             token_type, text = token[0:2]
01575             if self.verbose >= 3:
01576                 if token[2][0] == token[3][0]:
01577                     pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
01578                 else:
01579                     pos = 'l.%s' % token[3][0]
01580                 print('l.%s\t%s\t%s\t%r' %
01581                       (token[2][0], pos, tokenize.tok_name[token[0]], text))
01582             if token_type == tokenize.OP:
01583                 if text in '([{':
01584                     parens += 1
01585                 elif text in '}])':
01586                     parens -= 1
01587             elif not parens:
01588                 if token_type in NEWLINE:
01589                     if token_type == tokenize.NEWLINE:
01590                         self.check_logical()
01591                         self.blank_before = 0
01592                     elif len(self.tokens) == 1:
01593                         # The physical line contains only this token.
01594                         self.blank_lines += 1
01595                         del self.tokens[0]
01596                     else:
01597                         self.check_logical()
01598                 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
01599                     if len(self.tokens) == 1:
01600                         # The comment also ends a physical line
01601                         token = list(token)
01602                         token[1] = text.rstrip('\r\n')
01603                         token[3] = (token[2][0], token[2][1] + len(token[1]))
01604                         self.tokens = [tuple(token)]
01605                         self.check_logical()
01606         if self.tokens:
01607             self.check_physical(self.lines[-1])
01608             self.check_logical()
01609         return self.report.get_file_results()
01610 
01611 
01612 class BaseReport(object):
01613     """Collect the results of the checks."""
01614 
01615     print_filename = False
01616 
01617     def __init__(self, options):
01618         self._benchmark_keys = options.benchmark_keys
01619         self._ignore_code = options.ignore_code
01620         # Results
01621         self.elapsed = 0
01622         self.total_errors = 0
01623         self.counters = dict.fromkeys(self._benchmark_keys, 0)
01624         self.messages = {}
01625 
01626     def start(self):
01627         """Start the timer."""
01628         self._start_time = time.time()
01629 
01630     def stop(self):
01631         """Stop the timer."""
01632         self.elapsed = time.time() - self._start_time
01633 
01634     def init_file(self, filename, lines, expected, line_offset):
01635         """Signal a new file."""
01636         self.filename = filename
01637         self.lines = lines
01638         self.expected = expected or ()
01639         self.line_offset = line_offset
01640         self.file_errors = 0
01641         self.counters['files'] += 1
01642         self.counters['physical lines'] += len(lines)
01643 
01644     def increment_logical_line(self):
01645         """Signal a new logical line."""
01646         self.counters['logical lines'] += 1
01647 
01648     def error(self, line_number, offset, text, check):
01649         """Report an error, according to options."""
01650         code = text[:4]
01651         if self._ignore_code(code):
01652             return
01653         if code in self.counters:
01654             self.counters[code] += 1
01655         else:
01656             self.counters[code] = 1
01657             self.messages[code] = text[5:]
01658         # Don't care about expected errors or warnings
01659         if code in self.expected:
01660             return
01661         if self.print_filename and not self.file_errors:
01662             print(self.filename)
01663         self.file_errors += 1
01664         self.total_errors += 1
01665         return code
01666 
01667     def get_file_results(self):
01668         """Return the count of errors and warnings for this file."""
01669         return self.file_errors
01670 
01671     def get_count(self, prefix=''):
01672         """Return the total count of errors and warnings."""
01673         return sum([self.counters[key]
01674                     for key in self.messages if key.startswith(prefix)])
01675 
01676     def get_statistics(self, prefix=''):
01677         """Get statistics for message codes that start with the prefix.
01678 
01679         prefix='' matches all errors and warnings
01680         prefix='E' matches all errors
01681         prefix='W' matches all warnings
01682         prefix='E4' matches all errors that have to do with imports
01683         """
01684         return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
01685                 for key in sorted(self.messages) if key.startswith(prefix)]
01686 
01687     def print_statistics(self, prefix=''):
01688         """Print overall statistics (number of errors and warnings)."""
01689         for line in self.get_statistics(prefix):
01690             print(line)
01691 
01692     def print_benchmark(self):
01693         """Print benchmark numbers."""
01694         print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
01695         if self.elapsed:
01696             for key in self._benchmark_keys:
01697                 print('%-7d %s per second (%d total)' %
01698                       (self.counters[key] / self.elapsed, key,
01699                        self.counters[key]))
01700 
01701 
01702 class FileReport(BaseReport):
01703     """Collect the results of the checks and print only the filenames."""
01704     print_filename = True
01705 
01706 
01707 class StandardReport(BaseReport):
01708     """Collect and print the results of the checks."""
01709 
01710     def __init__(self, options):
01711         super(StandardReport, self).__init__(options)
01712         self._fmt = REPORT_FORMAT.get(options.format.lower(),
01713                                       options.format)
01714         self._repeat = options.repeat
01715         self._show_source = options.show_source
01716         self._show_pep8 = options.show_pep8
01717 
01718     def init_file(self, filename, lines, expected, line_offset):
01719         """Signal a new file."""
01720         self._deferred_print = []
01721         return super(StandardReport, self).init_file(
01722             filename, lines, expected, line_offset)
01723 
01724     def error(self, line_number, offset, text, check):
01725         """Report an error, according to options."""
01726         code = super(StandardReport, self).error(line_number, offset,
01727                                                  text, check)
01728         if code and (self.counters[code] == 1 or self._repeat):
01729             self._deferred_print.append(
01730                 (line_number, offset, code, text[5:], check.__doc__))
01731         return code
01732 
01733     def get_file_results(self):
01734         """Print the result and return the overall count for this file."""
01735         self._deferred_print.sort()
01736         for line_number, offset, code, text, doc in self._deferred_print:
01737             print(self._fmt % {
01738                 'path': self.filename,
01739                 'row': self.line_offset + line_number, 'col': offset + 1,
01740                 'code': code, 'text': text,
01741             })
01742             if self._show_source:
01743                 if line_number > len(self.lines):
01744                     line = ''
01745                 else:
01746                     line = self.lines[line_number - 1]
01747                 print(line.rstrip())
01748                 print(re.sub(r'\S', ' ', line[:offset]) + '^')
01749             if self._show_pep8 and doc:
01750                 print('    ' + doc.strip())
01751 
01752             # stdout is block buffered when not stdout.isatty().
01753             # line can be broken where buffer boundary since other processes
01754             # write to same file.
01755             # flush() after print() to avoid buffer boundary.
01756             # Typical buffer size is 8192. line written safely when
01757             # len(line) < 8192.
01758             sys.stdout.flush()
01759         return self.file_errors
01760 
01761 
01762 class DiffReport(StandardReport):
01763     """Collect and print the results for the changed lines only."""
01764 
01765     def __init__(self, options):
01766         super(DiffReport, self).__init__(options)
01767         self._selected = options.selected_lines
01768 
01769     def error(self, line_number, offset, text, check):
01770         if line_number not in self._selected[self.filename]:
01771             return
01772         return super(DiffReport, self).error(line_number, offset, text, check)
01773 
01774 
01775 class StyleGuide(object):
01776     """Initialize a PEP-8 instance with few options."""
01777 
01778     def __init__(self, *args, **kwargs):
01779         # build options from the command line
01780         self.checker_class = kwargs.pop('checker_class', Checker)
01781         parse_argv = kwargs.pop('parse_argv', False)
01782         config_file = kwargs.pop('config_file', False)
01783         parser = kwargs.pop('parser', None)
01784         # build options from dict
01785         options_dict = dict(*args, **kwargs)
01786         arglist = None if parse_argv else options_dict.get('paths', None)
01787         options, self.paths = process_options(
01788             arglist, parse_argv, config_file, parser)
01789         if options_dict:
01790             options.__dict__.update(options_dict)
01791             if 'paths' in options_dict:
01792                 self.paths = options_dict['paths']
01793 
01794         self.runner = self.input_file
01795         self.options = options
01796 
01797         if not options.reporter:
01798             options.reporter = BaseReport if options.quiet else StandardReport
01799 
01800         options.select = tuple(options.select or ())
01801         if not (options.select or options.ignore or
01802                 options.testsuite or options.doctest) and DEFAULT_IGNORE:
01803             # The default choice: ignore controversial checks
01804             options.ignore = tuple(DEFAULT_IGNORE.split(','))
01805         else:
01806             # Ignore all checks which are not explicitly selected
01807             options.ignore = ('',) if options.select else tuple(options.ignore)
01808         options.benchmark_keys = BENCHMARK_KEYS[:]
01809         options.ignore_code = self.ignore_code
01810         options.physical_checks = self.get_checks('physical_line')
01811         options.logical_checks = self.get_checks('logical_line')
01812         options.ast_checks = self.get_checks('tree')
01813         self.init_report()
01814 
01815     def init_report(self, reporter=None):
01816         """Initialize the report instance."""
01817         self.options.report = (reporter or self.options.reporter)(self.options)
01818         return self.options.report
01819 
01820     def check_files(self, paths=None):
01821         """Run all checks on the paths."""
01822         if paths is None:
01823             paths = self.paths
01824         report = self.options.report
01825         runner = self.runner
01826         report.start()
01827         try:
01828             for path in paths:
01829                 if os.path.isdir(path):
01830                     self.input_dir(path)
01831                 elif not self.excluded(path):
01832                     runner(path)
01833         except KeyboardInterrupt:
01834             print('... stopped')
01835         report.stop()
01836         return report
01837 
01838     def input_file(self, filename, lines=None, expected=None, line_offset=0):
01839         """Run all checks on a Python source file."""
01840         if self.options.verbose:
01841             print('checking %s' % filename)
01842         fchecker = self.checker_class(
01843             filename, lines=lines, options=self.options)
01844         return fchecker.check_all(expected=expected, line_offset=line_offset)
01845 
01846     def input_dir(self, dirname):
01847         """Check all files in this directory and all subdirectories."""
01848         dirname = dirname.rstrip('/')
01849         if self.excluded(dirname):
01850             return 0
01851         counters = self.options.report.counters
01852         verbose = self.options.verbose
01853         filepatterns = self.options.filename
01854         runner = self.runner
01855         for root, dirs, files in os.walk(dirname):
01856             if verbose:
01857                 print('directory ' + root)
01858             counters['directories'] += 1
01859             for subdir in sorted(dirs):
01860                 if self.excluded(subdir, root):
01861                     dirs.remove(subdir)
01862             for filename in sorted(files):
01863                 # contain a pattern that matches?
01864                 if ((filename_match(filename, filepatterns) and
01865                      not self.excluded(filename, root))):
01866                     runner(os.path.join(root, filename))
01867 
01868     def excluded(self, filename, parent=None):
01869         """Check if the file should be excluded.
01870 
01871         Check if 'options.exclude' contains a pattern that matches filename.
01872         """
01873         if not self.options.exclude:
01874             return False
01875         basename = os.path.basename(filename)
01876         if filename_match(basename, self.options.exclude):
01877             return True
01878         if parent:
01879             filename = os.path.join(parent, filename)
01880         filename = os.path.abspath(filename)
01881         return filename_match(filename, self.options.exclude)
01882 
01883     def ignore_code(self, code):
01884         """Check if the error code should be ignored.
01885 
01886         If 'options.select' contains a prefix of the error code,
01887         return False.  Else, if 'options.ignore' contains a prefix of
01888         the error code, return True.
01889         """
01890         if len(code) < 4 and any(s.startswith(code)
01891                                  for s in self.options.select):
01892             return False
01893         return (code.startswith(self.options.ignore) and
01894                 not code.startswith(self.options.select))
01895 
01896     def get_checks(self, argument_name):
01897         """Get all the checks for this category.
01898 
01899         Find all globally visible functions where the first argument name
01900         starts with argument_name and which contain selected tests.
01901         """
01902         checks = []
01903         for check, attrs in _checks[argument_name].items():
01904             (codes, args) = attrs
01905             if any(not (code and self.ignore_code(code)) for code in codes):
01906                 checks.append((check.__name__, check, args))
01907         return sorted(checks)
01908 
01909 
01910 def get_parser(prog='pep8', version=__version__):
01911     parser = OptionParser(prog=prog, version=version,
01912                           usage="%prog [options] input ...")
01913     parser.config_options = [
01914         'exclude', 'filename', 'select', 'ignore', 'max-line-length',
01915         'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
01916         'show-source', 'statistics', 'verbose']
01917     parser.add_option('-v', '--verbose', default=0, action='count',
01918                       help="print status messages, or debug with -vv")
01919     parser.add_option('-q', '--quiet', default=0, action='count',
01920                       help="report only file names, or nothing with -qq")
01921     parser.add_option('-r', '--repeat', default=True, action='store_true',
01922                       help="(obsolete) show all occurrences of the same error")
01923     parser.add_option('--first', action='store_false', dest='repeat',
01924                       help="show first occurrence of each error")
01925     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
01926                       help="exclude files or directories which match these "
01927                            "comma separated patterns (default: %default)")
01928     parser.add_option('--filename', metavar='patterns', default='*.py',
01929                       help="when parsing directories, only check filenames "
01930                            "matching these comma separated patterns "
01931                            "(default: %default)")
01932     parser.add_option('--select', metavar='errors', default='',
01933                       help="select errors and warnings (e.g. E,W6)")
01934     parser.add_option('--ignore', metavar='errors', default='',
01935                       help="skip errors and warnings (e.g. E4,W) "
01936                            "(default: %s)" % DEFAULT_IGNORE)
01937     parser.add_option('--show-source', action='store_true',
01938                       help="show source code for each error")
01939     parser.add_option('--show-pep8', action='store_true',
01940                       help="show text of PEP 8 for each error "
01941                            "(implies --first)")
01942     parser.add_option('--statistics', action='store_true',
01943                       help="count errors and warnings")
01944     parser.add_option('--count', action='store_true',
01945                       help="print total number of errors and warnings "
01946                            "to standard error and set exit code to 1 if "
01947                            "total is not null")
01948     parser.add_option('--max-line-length', type='int', metavar='n',
01949                       default=MAX_LINE_LENGTH,
01950                       help="set maximum allowed line length "
01951                            "(default: %default)")
01952     parser.add_option('--hang-closing', action='store_true',
01953                       help="hang closing bracket instead of matching "
01954                            "indentation of opening bracket's line")
01955     parser.add_option('--format', metavar='format', default='default',
01956                       help="set the error format [default|pylint|<custom>]")
01957     parser.add_option('--diff', action='store_true',
01958                       help="report only lines changed according to the "
01959                            "unified diff received on STDIN")
01960     group = parser.add_option_group("Testing Options")
01961     if os.path.exists(TESTSUITE_PATH):
01962         group.add_option('--testsuite', metavar='dir',
01963                          help="run regression tests from dir")
01964         group.add_option('--doctest', action='store_true',
01965                          help="run doctest on myself")
01966     group.add_option('--benchmark', action='store_true',
01967                      help="measure processing speed")
01968     return parser
01969 
01970 
01971 def read_config(options, args, arglist, parser):
01972     """Read and parse configurations
01973 
01974     If a config file is specified on the command line with the "--config"
01975     option, then only it is used for configuration.
01976 
01977     Otherwise, the user configuration (~/.config/pep8) and any local
01978     configurations in the current directory or above will be merged together
01979     (in that order) using the read method of ConfigParser.
01980     """
01981     config = RawConfigParser()
01982 
01983     cli_conf = options.config
01984 
01985     local_dir = os.curdir
01986 
01987     if cli_conf and os.path.isfile(cli_conf):
01988         if options.verbose:
01989             print('cli configuration: %s' % cli_conf)
01990         config.read(cli_conf)
01991     else:
01992         if USER_CONFIG and os.path.isfile(USER_CONFIG):
01993             if options.verbose:
01994                 print('user configuration: %s' % USER_CONFIG)
01995             config.read(USER_CONFIG)
01996 
01997         parent = tail = args and os.path.abspath(os.path.commonprefix(args))
01998         while tail:
01999             if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG):
02000                 local_dir = parent
02001                 if options.verbose:
02002                     print('local configuration: in %s' % parent)
02003                 break
02004             (parent, tail) = os.path.split(parent)
02005 
02006     pep8_section = parser.prog
02007     if config.has_section(pep8_section):
02008         option_list = dict([(o.dest, o.type or o.action)
02009                             for o in parser.option_list])
02010 
02011         # First, read the default values
02012         (new_options, __) = parser.parse_args([])
02013 
02014         # Second, parse the configuration
02015         for opt in config.options(pep8_section):
02016             if opt.replace('_', '-') not in parser.config_options:
02017                 print("  unknown option '%s' ignored" % opt)
02018                 continue
02019             if options.verbose > 1:
02020                 print("  %s = %s" % (opt, config.get(pep8_section, opt)))
02021             normalized_opt = opt.replace('-', '_')
02022             opt_type = option_list[normalized_opt]
02023             if opt_type in ('int', 'count'):
02024                 value = config.getint(pep8_section, opt)
02025             elif opt_type == 'string':
02026                 value = config.get(pep8_section, opt)
02027                 if normalized_opt == 'exclude':
02028                     value = normalize_paths(value, local_dir)
02029             else:
02030                 assert opt_type in ('store_true', 'store_false')
02031                 value = config.getboolean(pep8_section, opt)
02032             setattr(new_options, normalized_opt, value)
02033 
02034         # Third, overwrite with the command-line options
02035         (options, __) = parser.parse_args(arglist, values=new_options)
02036     options.doctest = options.testsuite = False
02037     return options
02038 
02039 
02040 def process_options(arglist=None, parse_argv=False, config_file=None,
02041                     parser=None):
02042     """Process options passed either via arglist or via command line args.
02043 
02044     Passing in the ``config_file`` parameter allows other tools, such as flake8
02045     to specify their own options to be processed in pep8.
02046     """
02047     if not parser:
02048         parser = get_parser()
02049     if not parser.has_option('--config'):
02050         group = parser.add_option_group("Configuration", description=(
02051             "The project options are read from the [%s] section of the "
02052             "tox.ini file or the setup.cfg file located in any parent folder "
02053             "of the path(s) being processed.  Allowed options are: %s." %
02054             (parser.prog, ', '.join(parser.config_options))))
02055         group.add_option('--config', metavar='path', default=config_file,
02056                          help="user config file location")
02057     # Don't read the command line if the module is used as a library.
02058     if not arglist and not parse_argv:
02059         arglist = []
02060     # If parse_argv is True and arglist is None, arguments are
02061     # parsed from the command line (sys.argv)
02062     (options, args) = parser.parse_args(arglist)
02063     options.reporter = None
02064 
02065     if options.ensure_value('testsuite', False):
02066         args.append(options.testsuite)
02067     elif not options.ensure_value('doctest', False):
02068         if parse_argv and not args:
02069             if options.diff or any(os.path.exists(name)
02070                                    for name in PROJECT_CONFIG):
02071                 args = ['.']
02072             else:
02073                 parser.error('input not specified')
02074         options = read_config(options, args, arglist, parser)
02075         options.reporter = parse_argv and options.quiet == 1 and FileReport
02076 
02077     options.filename = options.filename and options.filename.split(',')
02078     options.exclude = normalize_paths(options.exclude)
02079     options.select = options.select and options.select.split(',')
02080     options.ignore = options.ignore and options.ignore.split(',')
02081 
02082     if options.diff:
02083         options.reporter = DiffReport
02084         stdin = stdin_get_value()
02085         options.selected_lines = parse_udiff(stdin, options.filename, args[0])
02086         args = sorted(options.selected_lines)
02087 
02088     return options, args
02089 
02090 
02091 def _main():
02092     """Parse options and run checks on Python source."""
02093     import signal
02094 
02095     # Handle "Broken pipe" gracefully
02096     try:
02097         signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
02098     except AttributeError:
02099         pass    # not supported on Windows
02100 
02101     pep8style = StyleGuide(parse_argv=True)
02102     options = pep8style.options
02103     if options.doctest or options.testsuite:
02104         from testsuite.support import run_tests
02105         report = run_tests(pep8style)
02106     else:
02107         report = pep8style.check_files()
02108     if options.statistics:
02109         report.print_statistics()
02110     if options.benchmark:
02111         report.print_benchmark()
02112     if options.testsuite and not options.quiet:
02113         report.print_results()
02114     if report.total_errors:
02115         if options.count:
02116             sys.stderr.write(str(report.total_errors) + '\n')
02117         sys.exit(1)
02118 
02119 if __name__ == '__main__':
02120     _main()