roslint: pep8.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # pep8.py - Check Python source code formatting, according to PEP 8
00003 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
00004 # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
00005 #
00006 # Permission is hereby granted, free of charge, to any person
00007 # obtaining a copy of this software and associated documentation files
00008 # (the "Software"), to deal in the Software without restriction,
00009 # including without limitation the rights to use, copy, modify, merge,
00010 # publish, distribute, sublicense, and/or sell copies of the Software,
00011 # and to permit persons to whom the Software is furnished to do so,
00012 # subject to the following conditions:
00013 #
00014 # The above copyright notice and this permission notice shall be
00015 # included in all copies or substantial portions of the Software.
00016 #
00017 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
00021 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
00022 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
00023 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00024 # SOFTWARE.
00025 
00026 r"""
00027 Check Python source code formatting, according to PEP 8:
00028 http://www.python.org/dev/peps/pep-0008/
00029 
00030 For usage and a list of options, try this:
00031 $ python pep8.py -h
00032 
00033 This program and its regression test suite live here:
00034 http://github.com/jcrocholl/pep8
00035 
00036 Groups of errors and warnings:
00037 E errors
00038 W warnings
00039 100 indentation
00040 200 whitespace
00041 300 blank lines
00042 400 imports
00043 500 line length
00044 600 deprecation
00045 700 statements
00046 900 syntax error
00047 """
00048 __version__ = '1.4.7a0'
00049 
00050 import os
00051 import sys
00052 import re
00053 import time
00054 import inspect
00055 import keyword
00056 import tokenize
00057 from optparse import OptionParser
00058 from fnmatch import fnmatch
00059 try:
00060     from configparser import RawConfigParser
00061     from io import TextIOWrapper
00062 except ImportError:
00063     from ConfigParser import RawConfigParser
00064 
00065 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
00066 DEFAULT_IGNORE = 'E123,E226,E24'
00067 if sys.platform == 'win32':
00068     DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
00069 else:
00070     DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
00071                                   os.path.expanduser('~/.config'), 'pep8')
00072 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
00073 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
00074 MAX_LINE_LENGTH = 79
00075 REPORT_FORMAT = {
00076     'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
00077     'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
00078 }
00079 
00080 PyCF_ONLY_AST = 1024
00081 SINGLETONS = frozenset(['False', 'None', 'True'])
00082 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
00083 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
00084 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
00085 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
00086 WS_NEEDED_OPERATORS = frozenset([
00087     '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
00088     '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
00089 WHITESPACE = frozenset(' \t')
00090 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
00091                          tokenize.INDENT, tokenize.DEDENT])
00092 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
00093 
00094 INDENT_REGEX = re.compile(r'([ \t]*)')
00095 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
00096 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+')
00097 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
00098 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
00099 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
00100 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
00101 COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
00102 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
00103                                 r'|\s*\(\s*([^)]*[^ )])\s*\))')
00104 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
00105 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
00106 LAMBDA_REGEX = re.compile(r'\blambda\b')
00107 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
00108 
00109 # Work around Python < 2.6 behaviour, which does not generate NL after
00110 # a comment which is on a line by itself.
00111 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
00112 
00113 
00114 ##############################################################################
00115 # Plugins (check functions) for physical lines
00116 ##############################################################################
00117 
00118 
00119 def tabs_or_spaces(physical_line, indent_char):
00120     r"""
00121     Never mix tabs and spaces.
00122 
00123     The most popular way of indenting Python is with spaces only.  The
00124     second-most popular way is with tabs only.  Code indented with a mixture
00125     of tabs and spaces should be converted to using spaces exclusively.  When
00126     invoking the Python command line interpreter with the -t option, it issues
00127     warnings about code that illegally mixes tabs and spaces.  When using -tt
00128     these warnings become errors.  These options are highly recommended!
00129 
00130     Okay: if a == 0:\n        a = 1\n        b = 1
00131     E101: if a == 0:\n        a = 1\n\tb = 1
00132     """
00133     indent = INDENT_REGEX.match(physical_line).group(1)
00134     for offset, char in enumerate(indent):
00135         if char != indent_char:
00136             return offset, "E101 indentation contains mixed spaces and tabs"
00137 
00138 
00139 def tabs_obsolete(physical_line):
00140     r"""
00141     For new projects, spaces-only are strongly recommended over tabs.  Most
00142     editors have features that make this easy to do.
00143 
00144     Okay: if True:\n    return
00145     W191: if True:\n\treturn
00146     """
00147     indent = INDENT_REGEX.match(physical_line).group(1)
00148     if '\t' in indent:
00149         return indent.index('\t'), "W191 indentation contains tabs"
00150 
00151 
00152 def trailing_whitespace(physical_line):
00153     r"""
00154     JCR: Trailing whitespace is superfluous.
00155     FBM: Except when it occurs as part of a blank line (i.e. the line is
00156          nothing but whitespace). According to Python docs[1] a line with only
00157          whitespace is considered a blank line, and is to be ignored. However,
00158          matching a blank line to its indentation level avoids mistakenly
00159          terminating a multi-line statement (e.g. class declaration) when
00160          pasting code into the standard Python interpreter.
00161 
00162          [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
00163 
00164     The warning returned varies on whether the line itself is blank, for easier
00165     filtering for those who want to indent their blank lines.
00166 
00167     Okay: spam(1)\n#
00168     W291: spam(1) \n#
00169     W293: class Foo(object):\n    \n    bang = 12
00170     """
00171     physical_line = physical_line.rstrip('\n')    # chr(10), newline
00172     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
00173     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
00174     stripped = physical_line.rstrip(' \t\v')
00175     if physical_line != stripped:
00176         if stripped:
00177             return len(stripped), "W291 trailing whitespace"
00178         else:
00179             return 0, "W293 blank line contains whitespace"
00180 
00181 
00182 def trailing_blank_lines(physical_line, lines, line_number):
00183     r"""
00184     JCR: Trailing blank lines are superfluous.
00185 
00186     Okay: spam(1)
00187     W391: spam(1)\n
00188     """
00189     if not physical_line.rstrip() and line_number == len(lines):
00190         return 0, "W391 blank line at end of file"
00191 
00192 
00193 def missing_newline(physical_line):
00194     """
00195     JCR: The last line should have a newline.
00196 
00197     Reports warning W292.
00198     """
00199     if physical_line.rstrip() == physical_line:
00200         return len(physical_line), "W292 no newline at end of file"
00201 
00202 
00203 def maximum_line_length(physical_line, max_line_length):
00204     """
00205     Limit all lines to a maximum of 79 characters.
00206 
00207     There are still many devices around that are limited to 80 character
00208     lines; plus, limiting windows to 80 characters makes it possible to have
00209     several windows side-by-side.  The default wrapping on such devices looks
00210     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
00211     For flowing long blocks of text (docstrings or comments), limiting the
00212     length to 72 characters is recommended.
00213 
00214     Reports error E501.
00215     """
00216     line = physical_line.rstrip()
00217     length = len(line)
00218     if length > max_line_length and not noqa(line):
00219         if hasattr(line, 'decode'):   # Python 2
00220             # The line could contain multi-byte characters
00221             try:
00222                 length = len(line.decode('utf-8'))
00223             except UnicodeError:
00224                 pass
00225         if length > max_line_length:
00226             return (max_line_length, "E501 line too long "
00227                     "(%d > %d characters)" % (length, max_line_length))
00228 
00229 
00230 ##############################################################################
00231 # Plugins (check functions) for logical lines
00232 ##############################################################################
00233 
00234 
00235 def blank_lines(logical_line, blank_lines, indent_level, line_number,
00236                 previous_logical, previous_indent_level):
00237     r"""
00238     Separate top-level function and class definitions with two blank lines.
00239 
00240     Method definitions inside a class are separated by a single blank line.
00241 
00242     Extra blank lines may be used (sparingly) to separate groups of related
00243     functions.  Blank lines may be omitted between a bunch of related
00244     one-liners (e.g. a set of dummy implementations).
00245 
00246     Use blank lines in functions, sparingly, to indicate logical sections.
00247 
00248     Okay: def a():\n    pass\n\n\ndef b():\n    pass
00249     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
00250 
00251     E301: class Foo:\n    b = 0\n    def bar():\n        pass
00252     E302: def a():\n    pass\n\ndef b(n):\n    pass
00253     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
00254     E303: def a():\n\n\n\n    pass
00255     E304: @decorator\n\ndef a():\n    pass
00256     """
00257     if line_number < 3 and not previous_logical:
00258         return  # Don't expect blank lines before the first line
00259     if previous_logical.startswith('@'):
00260         if blank_lines:
00261             yield 0, "E304 blank lines found after function decorator"
00262     elif blank_lines > 2 or (indent_level and blank_lines == 2):
00263         yield 0, "E303 too many blank lines (%d)" % blank_lines
00264     elif logical_line.startswith(('def ', 'class ', '@')):
00265         if indent_level:
00266             if not (blank_lines or previous_indent_level < indent_level or
00267                     DOCSTRING_REGEX.match(previous_logical)):
00268                 yield 0, "E301 expected 1 blank line, found 0"
00269         elif blank_lines != 2:
00270             yield 0, "E302 expected 2 blank lines, found %d" % blank_lines
00271 
00272 
00273 def extraneous_whitespace(logical_line):
00274     """
00275     Avoid extraneous whitespace in the following situations:
00276 
00277     - Immediately inside parentheses, brackets or braces.
00278 
00279     - Immediately before a comma, semicolon, or colon.
00280 
00281     Okay: spam(ham[1], {eggs: 2})
00282     E201: spam( ham[1], {eggs: 2})
00283     E201: spam(ham[ 1], {eggs: 2})
00284     E201: spam(ham[1], { eggs: 2})
00285     E202: spam(ham[1], {eggs: 2} )
00286     E202: spam(ham[1 ], {eggs: 2})
00287     E202: spam(ham[1], {eggs: 2 })
00288 
00289     E203: if x == 4: print x, y; x, y = y , x
00290     E203: if x == 4: print x, y ; x, y = y, x
00291     E203: if x == 4 : print x, y; x, y = y, x
00292     """
00293     line = logical_line
00294     for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
00295         text = match.group()
00296         char = text.strip()
00297         found = match.start()
00298         if text == char + ' ':
00299             # assert char in '([{'
00300             yield found + 1, "E201 whitespace after '%s'" % char
00301         elif line[found - 1] != ',':
00302             code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
00303             yield found, "%s whitespace before '%s'" % (code, char)
00304 
00305 
00306 def whitespace_around_keywords(logical_line):
00307     r"""
00308     Avoid extraneous whitespace around keywords.
00309 
00310     Okay: True and False
00311     E271: True and  False
00312     E272: True  and False
00313     E273: True and\tFalse
00314     E274: True\tand False
00315     """
00316     for match in KEYWORD_REGEX.finditer(logical_line):
00317         before, after = match.groups()
00318 
00319         if '\t' in before:
00320             yield match.start(1), "E274 tab before keyword"
00321         elif len(before) > 1:
00322             yield match.start(1), "E272 multiple spaces before keyword"
00323 
00324         if '\t' in after:
00325             yield match.start(2), "E273 tab after keyword"
00326         elif len(after) > 1:
00327             yield match.start(2), "E271 multiple spaces after keyword"
00328 
00329 
00330 def missing_whitespace(logical_line):
00331     """
00332     JCR: Each comma, semicolon or colon should be followed by whitespace.
00333 
00334     Okay: [a, b]
00335     Okay: (3,)
00336     Okay: a[1:4]
00337     Okay: a[:4]
00338     Okay: a[1:]
00339     Okay: a[1:4:2]
00340     E231: ['a','b']
00341     E231: foo(bar,baz)
00342     E231: [{'a':'b'}]
00343     """
00344     line = logical_line
00345     for index in range(len(line) - 1):
00346         char = line[index]
00347         if char in ',;:' and line[index + 1] not in WHITESPACE:
00348             before = line[:index]
00349             if char == ':' and before.count('[') > before.count(']') and \
00350                     before.rfind('{') < before.rfind('['):
00351                 continue  # Slice syntax, no space required
00352             if char == ',' and line[index + 1] == ')':
00353                 continue  # Allow tuple with only one element: (3,)
00354             yield index, "E231 missing whitespace after '%s'" % char
00355 
00356 
00357 def indentation(logical_line, previous_logical, indent_char,
00358                 indent_level, previous_indent_level):
00359     r"""
00360     Use 4 spaces per indentation level.
00361 
00362     For really old code that you don't want to mess up, you can continue to
00363     use 8-space tabs.
00364 
00365     Okay: a = 1
00366     Okay: if a == 0:\n    a = 1
00367     E111:   a = 1
00368 
00369     Okay: for item in items:\n    pass
00370     E112: for item in items:\npass
00371 
00372     Okay: a = 1\nb = 2
00373     E113: a = 1\n    b = 2
00374     """
00375     if indent_char == ' ' and indent_level % 4:
00376         yield 0, "E111 indentation is not a multiple of four"
00377     indent_expect = previous_logical.endswith(':')
00378     if indent_expect and indent_level <= previous_indent_level:
00379         yield 0, "E112 expected an indented block"
00380     if indent_level > previous_indent_level and not indent_expect:
00381         yield 0, "E113 unexpected indentation"
00382 
00383 
00384 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
00385                           noqa, verbose):
00386     r"""
00387     Continuation lines should align wrapped elements either vertically using
00388     Python's implicit line joining inside parentheses, brackets and braces, or
00389     using a hanging indent.
00390 
00391     When using a hanging indent the following considerations should be applied:
00392 
00393     - there should be no arguments on the first line, and
00394 
00395     - further indentation should be used to clearly distinguish itself as a
00396       continuation line.
00397 
00398     Okay: a = (\n)
00399     E123: a = (\n    )
00400 
00401     Okay: a = (\n    42)
00402     E121: a = (\n   42)
00403     E122: a = (\n42)
00404     E123: a = (\n    42\n    )
00405     E124: a = (24,\n     42\n)
00406     E125: if (a or\n    b):\n    pass
00407     E126: a = (\n        42)
00408     E127: a = (24,\n      42)
00409     E128: a = (24,\n    42)
00410     """
00411     first_row = tokens[0][2][0]
00412     nrows = 1 + tokens[-1][2][0] - first_row
00413     if noqa or nrows == 1:
00414         return
00415 
00416     # indent_next tells us whether the next block is indented; assuming
00417     # that it is indented by 4 spaces, then we should not allow 4-space
00418     # indents on the final continuation line; in turn, some other
00419     # indents are allowed to have an extra 4 spaces.
00420     indent_next = logical_line.endswith(':')
00421 
00422     row = depth = 0
00423     # remember how many brackets were opened on each line
00424     parens = [0] * nrows
00425     # relative indents of physical lines
00426     rel_indent = [0] * nrows
00427     # visual indents
00428     indent_chances = {}
00429     last_indent = tokens[0][2]
00430     indent = [last_indent[1]]
00431     if verbose >= 3:
00432         print(">>> " + tokens[0][4].rstrip())
00433 
00434     for token_type, text, start, end, line in tokens:
00435 
00436         newline = row < start[0] - first_row
00437         if newline:
00438             row = start[0] - first_row
00439             newline = (not last_token_multiline and
00440                        token_type not in (tokenize.NL, tokenize.NEWLINE))
00441 
00442         if newline:
00443             # this is the beginning of a continuation line.
00444             last_indent = start
00445             if verbose >= 3:
00446                 print("... " + line.rstrip())
00447 
00448             # record the initial indent.
00449             rel_indent[row] = expand_indent(line) - indent_level
00450 
00451             if depth:
00452                 # a bracket expression in a continuation line.
00453                 # find the line that it was opened on
00454                 for open_row in range(row - 1, -1, -1):
00455                     if parens[open_row]:
00456                         break
00457             else:
00458                 # an unbracketed continuation line (ie, backslash)
00459                 open_row = 0
00460             hang = rel_indent[row] - rel_indent[open_row]
00461             close_bracket = (token_type == tokenize.OP and text in ']})')
00462             visual_indent = (not close_bracket and hang > 0 and
00463                              indent_chances.get(start[1]))
00464 
00465             if close_bracket and indent[depth]:
00466                 # closing bracket for visual indent
00467                 if start[1] != indent[depth]:
00468                     yield (start, "E124 closing bracket does not match "
00469                            "visual indentation")
00470             elif close_bracket and not hang:
00471                 # closing bracket matches indentation of opening bracket's line
00472                 if hang_closing:
00473                     yield start, "E133 closing bracket is missing indentation"
00474             elif visual_indent is True:
00475                 # visual indent is verified
00476                 if not indent[depth]:
00477                     indent[depth] = start[1]
00478             elif visual_indent in (text, str):
00479                 # ignore token lined up with matching one from a previous line
00480                 pass
00481             elif indent[depth] and start[1] < indent[depth]:
00482                 # visual indent is broken
00483                 yield (start, "E128 continuation line "
00484                        "under-indented for visual indent")
00485             elif hang == 4 or (indent_next and rel_indent[row] == 8):
00486                 # hanging indent is verified
00487                 if close_bracket and not hang_closing:
00488                     yield (start, "E123 closing bracket does not match "
00489                            "indentation of opening bracket's line")
00490             else:
00491                 # indent is broken
00492                 if hang <= 0:
00493                     error = "E122", "missing indentation or outdented"
00494                 elif indent[depth]:
00495                     error = "E127", "over-indented for visual indent"
00496                 elif hang % 4:
00497                     error = "E121", "indentation is not a multiple of four"
00498                 else:
00499                     error = "E126", "over-indented for hanging indent"
00500                 yield start, "%s continuation line %s" % error
00501 
00502         # look for visual indenting
00503         if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
00504                 and not indent[depth]):
00505             indent[depth] = start[1]
00506             indent_chances[start[1]] = True
00507             if verbose >= 4:
00508                 print("bracket depth %s indent to %s" % (depth, start[1]))
00509         # deal with implicit string concatenation
00510         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
00511               text in ('u', 'ur', 'b', 'br')):
00512             indent_chances[start[1]] = str
00513         # special case for the "if" statement because len("if (") == 4
00514         elif not indent_chances and not row and not depth and text == 'if':
00515             indent_chances[end[1] + 1] = True
00516 
00517         # keep track of bracket depth
00518         if token_type == tokenize.OP:
00519             if text in '([{':
00520                 depth += 1
00521                 indent.append(0)
00522                 parens[row] += 1
00523                 if verbose >= 4:
00524                     print("bracket depth %s seen, col %s, visual min = %s" %
00525                           (depth, start[1], indent[depth]))
00526             elif text in ')]}' and depth > 0:
00527                 # parent indents should not be more than this one
00528                 prev_indent = indent.pop() or last_indent[1]
00529                 for d in range(depth):
00530                     if indent[d] > prev_indent:
00531                         indent[d] = 0
00532                 for ind in list(indent_chances):
00533                     if ind >= prev_indent:
00534                         del indent_chances[ind]
00535                 depth -= 1
00536                 if depth:
00537                     indent_chances[indent[depth]] = True
00538                 for idx in range(row, -1, -1):
00539                     if parens[idx]:
00540                         parens[idx] -= 1
00541                         rel_indent[row] = rel_indent[idx]
00542                         break
00543             assert len(indent) == depth + 1
00544             if start[1] not in indent_chances:
00545                 # allow to line up tokens
00546                 indent_chances[start[1]] = text
00547 
00548         last_token_multiline = (start[0] != end[0])
00549 
00550     if indent_next and expand_indent(line) == indent_level + 4:
00551         yield (last_indent, "E125 continuation line does not distinguish "
00552                "itself from next logical line")
00553 
00554 
00555 def whitespace_before_parameters(logical_line, tokens):
00556     """
00557     Avoid extraneous whitespace in the following situations:
00558 
00559     - Immediately before the open parenthesis that starts the argument
00560       list of a function call.
00561 
00562     - Immediately before the open parenthesis that starts an indexing or
00563       slicing.
00564 
00565     Okay: spam(1)
00566     E211: spam (1)
00567 
00568     Okay: dict['key'] = list[index]
00569     E211: dict ['key'] = list[index]
00570     E211: dict['key'] = list [index]
00571     """
00572     prev_type, prev_text, __, prev_end, __ = tokens[0]
00573     for index in range(1, len(tokens)):
00574         token_type, text, start, end, __ = tokens[index]
00575         if (token_type == tokenize.OP and
00576             text in '([' and
00577             start != prev_end and
00578             (prev_type == tokenize.NAME or prev_text in '}])') and
00579             # Syntax "class A (B):" is allowed, but avoid it
00580             (index < 2 or tokens[index - 2][1] != 'class') and
00581                 # Allow "return (a.foo for a in range(5))"
00582                 not keyword.iskeyword(prev_text)):
00583             yield prev_end, "E211 whitespace before '%s'" % text
00584         prev_type = token_type
00585         prev_text = text
00586         prev_end = end
00587 
00588 
00589 def whitespace_around_operator(logical_line):
00590     r"""
00591     Avoid extraneous whitespace in the following situations:
00592 
00593     - More than one space around an assignment (or other) operator to
00594       align it with another.
00595 
00596     Okay: a = 12 + 3
00597     E221: a = 4  + 5
00598     E222: a = 4 +  5
00599     E223: a = 4\t+ 5
00600     E224: a = 4 +\t5
00601     """
00602     for match in OPERATOR_REGEX.finditer(logical_line):
00603         before, after = match.groups()
00604 
00605         if '\t' in before:
00606             yield match.start(1), "E223 tab before operator"
00607         elif len(before) > 1:
00608             yield match.start(1), "E221 multiple spaces before operator"
00609 
00610         if '\t' in after:
00611             yield match.start(2), "E224 tab after operator"
00612         elif len(after) > 1:
00613             yield match.start(2), "E222 multiple spaces after operator"
00614 
00615 
00616 def missing_whitespace_around_operator(logical_line, tokens):
00617     r"""
00618     - Always surround these binary operators with a single space on
00619       either side: assignment (=), augmented assignment (+=, -= etc.),
00620       comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
00621       Booleans (and, or, not).
00622 
00623     - Use spaces around arithmetic operators.
00624 
00625     Okay: i = i + 1
00626     Okay: submitted += 1
00627     Okay: x = x * 2 - 1
00628     Okay: hypot2 = x * x + y * y
00629     Okay: c = (a + b) * (a - b)
00630     Okay: foo(bar, key='word', *args, **kwargs)
00631     Okay: alpha[:-i]
00632 
00633     E225: i=i+1
00634     E225: submitted +=1
00635     E225: x = x /2 - 1
00636     E225: z = x **y
00637     E226: c = (a+b) * (a-b)
00638     E226: hypot2 = x*x + y*y
00639     E227: c = a|b
00640     E228: msg = fmt%(errno, errmsg)
00641     """
00642     parens = 0
00643     need_space = False
00644     prev_type = tokenize.OP
00645     prev_text = prev_end = None
00646     for token_type, text, start, end, line in tokens:
00647         if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
00648             # ERRORTOKEN is triggered by backticks in Python 3
00649             continue
00650         if text in ('(', 'lambda'):
00651             parens += 1
00652         elif text == ')':
00653             parens -= 1
00654         if need_space:
00655             if start != prev_end:
00656                 # Found a (probably) needed space
00657                 if need_space is not True and not need_space[1]:
00658                     yield (need_space[0],
00659                            "E225 missing whitespace around operator")
00660                 need_space = False
00661             elif text == '>' and prev_text in ('<', '-'):
00662                 # Tolerate the "<>" operator, even if running Python 3
00663                 # Deal with Python 3's annotated return value "->"
00664                 pass
00665             else:
00666                 if need_space is True or need_space[1]:
00667                     # A needed trailing space was not found
00668                     yield prev_end, "E225 missing whitespace around operator"
00669                 else:
00670                     code, optype = 'E226', 'arithmetic'
00671                     if prev_text == '%':
00672                         code, optype = 'E228', 'modulo'
00673                     elif prev_text not in ARITHMETIC_OP:
00674                         code, optype = 'E227', 'bitwise or shift'
00675                     yield (need_space[0], "%s missing whitespace "
00676                            "around %s operator" % (code, optype))
00677                 need_space = False
00678         elif token_type == tokenize.OP and prev_end is not None:
00679             if text == '=' and parens:
00680                 # Allow keyword args or defaults: foo(bar=None).
00681                 pass
00682             elif text in WS_NEEDED_OPERATORS:
00683                 need_space = True
00684             elif text in UNARY_OPERATORS:
00685                 # Check if the operator is being used as a binary operator
00686                 # Allow unary operators: -123, -x, +1.
00687                 # Allow argument unpacking: foo(*args, **kwargs).
00688                 if prev_type == tokenize.OP:
00689                     binary_usage = (prev_text in '}])')
00690                 elif prev_type == tokenize.NAME:
00691                     binary_usage = (prev_text not in KEYWORDS)
00692                 else:
00693                     binary_usage = (prev_type not in SKIP_TOKENS)
00694 
00695                 if binary_usage:
00696                     need_space = None
00697             elif text in WS_OPTIONAL_OPERATORS:
00698                 need_space = None
00699 
00700             if need_space is None:
00701                 # Surrounding space is optional, but ensure that
00702                 # trailing space matches opening space
00703                 need_space = (prev_end, start != prev_end)
00704             elif need_space and start == prev_end:
00705                 # A needed opening space was not found
00706                 yield prev_end, "E225 missing whitespace around operator"
00707                 need_space = False
00708         prev_type = token_type
00709         prev_text = text
00710         prev_end = end
00711 
00712 
00713 def whitespace_around_comma(logical_line):
00714     r"""
00715     Avoid extraneous whitespace in the following situations:
00716 
00717     - More than one space around an assignment (or other) operator to
00718       align it with another.
00719 
00720     Note: these checks are disabled by default
00721 
00722     Okay: a = (1, 2)
00723     E241: a = (1,  2)
00724     E242: a = (1,\t2)
00725     """
00726     line = logical_line
00727     for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
00728         found = m.start() + 1
00729         if '\t' in m.group():
00730             yield found, "E242 tab after '%s'" % m.group()[0]
00731         else:
00732             yield found, "E241 multiple spaces after '%s'" % m.group()[0]
00733 
00734 
00735 def whitespace_around_named_parameter_equals(logical_line, tokens):
00736     """
00737     Don't use spaces around the '=' sign when used to indicate a
00738     keyword argument or a default parameter value.
00739 
00740     Okay: def complex(real, imag=0.0):
00741     Okay: return magic(r=real, i=imag)
00742     Okay: boolean(a == b)
00743     Okay: boolean(a != b)
00744     Okay: boolean(a <= b)
00745     Okay: boolean(a >= b)
00746 
00747     E251: def complex(real, imag = 0.0):
00748     E251: return magic(r = real, i = imag)
00749     """
00750     parens = 0
00751     no_space = False
00752     prev_end = None
00753     message = "E251 unexpected spaces around keyword / parameter equals"
00754     for token_type, text, start, end, line in tokens:
00755         if no_space:
00756             no_space = False
00757             if start != prev_end:
00758                 yield (prev_end, message)
00759         elif token_type == tokenize.OP:
00760             if text == '(':
00761                 parens += 1
00762             elif text == ')':
00763                 parens -= 1
00764             elif parens and text == '=':
00765                 no_space = True
00766                 if start != prev_end:
00767                     yield (prev_end, message)
00768         prev_end = end
00769 
00770 
00771 def whitespace_before_inline_comment(logical_line, tokens):
00772     """
00773     Separate inline comments by at least two spaces.
00774 
00775     An inline comment is a comment on the same line as a statement.  Inline
00776     comments should be separated by at least two spaces from the statement.
00777     They should start with a # and a single space.
00778 
00779     Okay: x = x + 1  # Increment x
00780     Okay: x = x + 1    # Increment x
00781     E261: x = x + 1 # Increment x
00782     E262: x = x + 1  #Increment x
00783     E262: x = x + 1  #  Increment x
00784     """
00785     prev_end = (0, 0)
00786     for token_type, text, start, end, line in tokens:
00787         if token_type == tokenize.COMMENT:
00788             if not line[:start[1]].strip():
00789                 continue
00790             if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
00791                 yield (prev_end,
00792                        "E261 at least two spaces before inline comment")
00793             symbol, sp, comment = text.partition(' ')
00794             if symbol not in ('#', '#:') or comment[:1].isspace():
00795                 yield start, "E262 inline comment should start with '# '"
00796         elif token_type != tokenize.NL:
00797             prev_end = end
00798 
00799 
00800 def imports_on_separate_lines(logical_line):
00801     r"""
00802     Imports should usually be on separate lines.
00803 
00804     Okay: import os\nimport sys
00805     E401: import sys, os
00806 
00807     Okay: from subprocess import Popen, PIPE
00808     Okay: from myclas import MyClass
00809     Okay: from foo.bar.yourclass import YourClass
00810     Okay: import myclass
00811     Okay: import foo.bar.yourclass
00812     """
00813     line = logical_line
00814     if line.startswith('import '):
00815         found = line.find(',')
00816         if -1 < found and ';' not in line[:found]:
00817             yield found, "E401 multiple imports on one line"
00818 
00819 
00820 def compound_statements(logical_line):
00821     r"""
00822     Compound statements (multiple statements on the same line) are
00823     generally discouraged.
00824 
00825     While sometimes it's okay to put an if/for/while with a small body
00826     on the same line, never do this for multi-clause statements. Also
00827     avoid folding such long lines!
00828 
00829     Okay: if foo == 'blah':\n    do_blah_thing()
00830     Okay: do_one()
00831     Okay: do_two()
00832     Okay: do_three()
00833 
00834     E701: if foo == 'blah': do_blah_thing()
00835     E701: for x in lst: total += x
00836     E701: while t < 10: t = delay()
00837     E701: if foo == 'blah': do_blah_thing()
00838     E701: else: do_non_blah_thing()
00839     E701: try: something()
00840     E701: finally: cleanup()
00841     E701: if foo == 'blah': one(); two(); three()
00842 
00843     E702: do_one(); do_two(); do_three()
00844     E703: do_four();  # useless semicolon
00845     """
00846     line = logical_line
00847     last_char = len(line) - 1
00848     found = line.find(':')
00849     while -1 < found < last_char:
00850         before = line[:found]
00851         if (before.count('{') <= before.count('}') and  # {'a': 1} (dict)
00852             before.count('[') <= before.count(']') and  # [1:2] (slice)
00853             before.count('(') <= before.count(')') and  # (Python 3 annotation)
00854                 not LAMBDA_REGEX.search(before)):       # lambda x: x
00855             yield found, "E701 multiple statements on one line (colon)"
00856         found = line.find(':', found + 1)
00857     found = line.find(';')
00858     while -1 < found:
00859         if found < last_char:
00860             yield found, "E702 multiple statements on one line (semicolon)"
00861         else:
00862             yield found, "E703 statement ends with a semicolon"
00863         found = line.find(';', found + 1)
00864 
00865 
00866 def explicit_line_join(logical_line, tokens):
00867     r"""
00868     Avoid explicit line join between brackets.
00869 
00870     The preferred way of wrapping long lines is by using Python's implied line
00871     continuation inside parentheses, brackets and braces.  Long lines can be
00872     broken over multiple lines by wrapping expressions in parentheses.  These
00873     should be used in preference to using a backslash for line continuation.
00874 
00875     E502: aaa = [123, \\n       123]
00876     E502: aaa = ("bbb " \\n       "ccc")
00877 
00878     Okay: aaa = [123,\n       123]
00879     Okay: aaa = ("bbb "\n       "ccc")
00880     Okay: aaa = "bbb " \\n    "ccc"
00881     """
00882     prev_start = prev_end = parens = 0
00883     for token_type, text, start, end, line in tokens:
00884         if start[0] != prev_start and parens and backslash:
00885             yield backslash, "E502 the backslash is redundant between brackets"
00886         if end[0] != prev_end:
00887             if line.rstrip('\r\n').endswith('\\'):
00888                 backslash = (end[0], len(line.splitlines()[-1]) - 1)
00889             else:
00890                 backslash = None
00891             prev_start = prev_end = end[0]
00892         else:
00893             prev_start = start[0]
00894         if token_type == tokenize.OP:
00895             if text in '([{':
00896                 parens += 1
00897             elif text in ')]}':
00898                 parens -= 1
00899 
00900 
00901 def comparison_to_singleton(logical_line, noqa):
00902     """
00903     Comparisons to singletons like None should always be done
00904     with "is" or "is not", never the equality operators.
00905 
00906     Okay: if arg is not None:
00907     E711: if arg != None:
00908     E712: if arg == True:
00909 
00910     Also, beware of writing if x when you really mean if x is not None --
00911     e.g. when testing whether a variable or argument that defaults to None was
00912     set to some other value.  The other value might have a type (such as a
00913     container) that could be false in a boolean context!
00914     """
00915     match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
00916     if match:
00917         same = (match.group(1) == '==')
00918         singleton = match.group(2)
00919         msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
00920         if singleton in ('None',):
00921             code = 'E711'
00922         else:
00923             code = 'E712'
00924             nonzero = ((singleton == 'True' and same) or
00925                        (singleton == 'False' and not same))
00926             msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
00927         yield match.start(1), ("%s comparison to %s should be %s" %
00928                                (code, singleton, msg))
00929 
00930 
00931 def comparison_type(logical_line):
00932     """
00933     Object type comparisons should always use isinstance() instead of
00934     comparing types directly.
00935 
00936     Okay: if isinstance(obj, int):
00937     E721: if type(obj) is type(1):
00938 
00939     When checking if an object is a string, keep in mind that it might be a
00940     unicode string too! In Python 2.3, str and unicode have a common base
00941     class, basestring, so you can do:
00942 
00943     Okay: if isinstance(obj, basestring):
00944     Okay: if type(a1) is type(b1):
00945     """
00946     match = COMPARE_TYPE_REGEX.search(logical_line)
00947     if match:
00948         inst = match.group(1)
00949         if inst and isidentifier(inst) and inst not in SINGLETONS:
00950             return  # Allow comparison for types which are not obvious
00951         yield match.start(), "E721 do not compare types, use 'isinstance()'"
00952 
00953 
00954 def python_3000_has_key(logical_line):
00955     r"""
00956     The {}.has_key() method is removed in the Python 3.
00957     Use the 'in' operation instead.
00958 
00959     Okay: if "alph" in d:\n    print d["alph"]
00960     W601: assert d.has_key('alph')
00961     """
00962     pos = logical_line.find('.has_key(')
00963     if pos > -1:
00964         yield pos, "W601 .has_key() is deprecated, use 'in'"
00965 
00966 
00967 def python_3000_raise_comma(logical_line):
00968     """
00969     When raising an exception, use "raise ValueError('message')"
00970     instead of the older form "raise ValueError, 'message'".
00971 
00972     The paren-using form is preferred because when the exception arguments
00973     are long or include string formatting, you don't need to use line
00974     continuation characters thanks to the containing parentheses.  The older
00975     form is removed in Python 3.
00976 
00977     Okay: raise DummyError("Message")
00978     W602: raise DummyError, "Message"
00979     """
00980     match = RAISE_COMMA_REGEX.match(logical_line)
00981     if match and not RERAISE_COMMA_REGEX.match(logical_line):
00982         yield match.end() - 1, "W602 deprecated form of raising exception"
00983 
00984 
00985 def python_3000_not_equal(logical_line):
00986     """
00987     != can also be written <>, but this is an obsolete usage kept for
00988     backwards compatibility only. New code should always use !=.
00989     The older syntax is removed in Python 3.
00990 
00991     Okay: if a != 'no':
00992     W603: if a <> 'no':
00993     """
00994     pos = logical_line.find('<>')
00995     if pos > -1:
00996         yield pos, "W603 '<>' is deprecated, use '!='"
00997 
00998 
00999 def python_3000_backticks(logical_line):
01000     """
01001     Backticks are removed in Python 3.
01002     Use repr() instead.
01003 
01004     Okay: val = repr(1 + 2)
01005     W604: val = `1 + 2`
01006     """
01007     pos = logical_line.find('`')
01008     if pos > -1:
01009         yield pos, "W604 backticks are deprecated, use 'repr()'"
01010 
01011 
01012 ##############################################################################
01013 # Helper functions
01014 ##############################################################################
01015 
01016 
01017 if '' == ''.encode():
01018     # Python 2: implicit encoding.
01019     def readlines(filename):
01020         f = open(filename)
01021         try:
01022             return f.readlines()
01023         finally:
01024             f.close()
01025     isidentifier = re.compile(r'[a-zA-Z_]\w*').match
01026     stdin_get_value = sys.stdin.read
01027 else:
01028     # Python 3
01029     def readlines(filename):
01030         f = open(filename, 'rb')
01031         try:
01032             coding, lines = tokenize.detect_encoding(f.readline)
01033             f = TextIOWrapper(f, coding, line_buffering=True)
01034             return [l.decode(coding) for l in lines] + f.readlines()
01035         except (LookupError, SyntaxError, UnicodeError):
01036             f.close()
01037             # Fall back if files are improperly declared
01038             f = open(filename, encoding='latin-1')
01039             return f.readlines()
01040         finally:
01041             f.close()
01042     isidentifier = str.isidentifier
01043 
01044     def stdin_get_value():
01045         return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
01046 readlines.__doc__ = "    Read the source code."
01047 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
01048 
01049 
01050 def expand_indent(line):
01051     r"""
01052     Return the amount of indentation.
01053     Tabs are expanded to the next multiple of 8.
01054 
01055     >>> expand_indent('    ')
01056     4
01057     >>> expand_indent('\t')
01058     8
01059     >>> expand_indent('    \t')
01060     8
01061     >>> expand_indent('       \t')
01062     8
01063     >>> expand_indent('        \t')
01064     16
01065     """
01066     if '\t' not in line:
01067         return len(line) - len(line.lstrip())
01068     result = 0
01069     for char in line:
01070         if char == '\t':
01071             result = result // 8 * 8 + 8
01072         elif char == ' ':
01073             result += 1
01074         else:
01075             break
01076     return result
01077 
01078 
01079 def mute_string(text):
01080     """
01081     Replace contents with 'xxx' to prevent syntax matching.
01082 
01083     >>> mute_string('"abc"')
01084     '"xxx"'
01085     >>> mute_string("'''abc'''")
01086     "'''xxx'''"
01087     >>> mute_string("r'abc'")
01088     "r'xxx'"
01089     """
01090     # String modifiers (e.g. u or r)
01091     start = text.index(text[-1]) + 1
01092     end = len(text) - 1
01093     # Triple quotes
01094     if text[-3:] in ('"""', "'''"):
01095         start += 2
01096         end -= 2
01097     return text[:start] + 'x' * (end - start) + text[end:]
01098 
01099 
01100 def parse_udiff(diff, patterns=None, parent='.'):
01101     """Return a dictionary of matching lines."""
01102     # For each file of the diff, the entry key is the filename,
01103     # and the value is a set of row numbers to consider.
01104     rv = {}
01105     path = nrows = None
01106     for line in diff.splitlines():
01107         if nrows:
01108             if line[:1] != '-':
01109                 nrows -= 1
01110             continue
01111         if line[:3] == '@@ ':
01112             hunk_match = HUNK_REGEX.match(line)
01113             row, nrows = [int(g or '1') for g in hunk_match.groups()]
01114             rv[path].update(range(row, row + nrows))
01115         elif line[:3] == '+++':
01116             path = line[4:].split('\t', 1)[0]
01117             if path[:2] == 'b/':
01118                 path = path[2:]
01119             rv[path] = set()
01120     return dict([(os.path.join(parent, path), rows)
01121                  for (path, rows) in rv.items()
01122                  if rows and filename_match(path, patterns)])
01123 
01124 
01125 def filename_match(filename, patterns, default=True):
01126     """
01127     Check if patterns contains a pattern that matches filename.
01128     If patterns is unspecified, this always returns True.
01129     """
01130     if not patterns:
01131         return default
01132     return any(fnmatch(filename, pattern) for pattern in patterns)
01133 
01134 
01135 ##############################################################################
01136 # Framework to run all checks
01137 ##############################################################################
01138 
01139 
01140 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
01141 
01142 
01143 def register_check(check, codes=None):
01144     """
01145     Register a new check object.
01146     """
01147     def _add_check(check, kind, codes, args):
01148         if check in _checks[kind]:
01149             _checks[kind][check][0].extend(codes or [])
01150         else:
01151             _checks[kind][check] = (codes or [''], args)
01152     if inspect.isfunction(check):
01153         args = inspect.getargspec(check)[0]
01154         if args and args[0] in ('physical_line', 'logical_line'):
01155             if codes is None:
01156                 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
01157             _add_check(check, args[0], codes, args)
01158     elif inspect.isclass(check):
01159         if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
01160             _add_check(check, 'tree', codes, None)
01161 
01162 
01163 def init_checks_registry():
01164     """
01165     Register all globally visible functions where the first argument name
01166     is 'physical_line' or 'logical_line'.
01167     """
01168     mod = inspect.getmodule(register_check)
01169     for (name, function) in inspect.getmembers(mod, inspect.isfunction):
01170         register_check(function)
01171 init_checks_registry()
01172 
01173 
01174 class Checker(object):
01175     """
01176     Load a Python source file, tokenize it, check coding style.
01177     """
01178 
01179     def __init__(self, filename=None, lines=None,
01180                  options=None, report=None, **kwargs):
01181         if options is None:
01182             options = StyleGuide(kwargs).options
01183         else:
01184             assert not kwargs
01185         self._io_error = None
01186         self._physical_checks = options.physical_checks
01187         self._logical_checks = options.logical_checks
01188         self._ast_checks = options.ast_checks
01189         self.max_line_length = options.max_line_length
01190         self.hang_closing = options.hang_closing
01191         self.verbose = options.verbose
01192         self.filename = filename
01193         if filename is None:
01194             self.filename = 'stdin'
01195             self.lines = lines or []
01196         elif filename == '-':
01197             self.filename = 'stdin'
01198             self.lines = stdin_get_value().splitlines(True)
01199         elif lines is None:
01200             try:
01201                 self.lines = readlines(filename)
01202             except IOError:
01203                 exc_type, exc = sys.exc_info()[:2]
01204                 self._io_error = '%s: %s' % (exc_type.__name__, exc)
01205                 self.lines = []
01206         else:
01207             self.lines = lines
01208         if self.lines:
01209             ord0 = ord(self.lines[0][0])
01210             if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
01211                 if ord0 == 0xfeff:
01212                     self.lines[0] = self.lines[0][1:]
01213                 elif self.lines[0][:3] == '\xef\xbb\xbf':
01214                     self.lines[0] = self.lines[0][3:]
01215         self.report = report or options.report
01216         self.report_error = self.report.error
01217 
01218     def report_invalid_syntax(self):
01219         exc_type, exc = sys.exc_info()[:2]
01220         if len(exc.args) > 1:
01221             offset = exc.args[1]
01222             if len(offset) > 2:
01223                 offset = offset[1:3]
01224         else:
01225             offset = (1, 0)
01226         self.report_error(offset[0], offset[1] or 0,
01227                           'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
01228                           self.report_invalid_syntax)
01229     report_invalid_syntax.__doc__ = "    Check if the syntax is valid."
01230 
01231     def readline(self):
01232         """
01233         Get the next line from the input buffer.
01234         """
01235         self.line_number += 1
01236         if self.line_number > len(self.lines):
01237             return ''
01238         return self.lines[self.line_number - 1]
01239 
01240     def readline_check_physical(self):
01241         """
01242         Check and return the next physical line. This method can be
01243         used to feed tokenize.generate_tokens.
01244         """
01245         line = self.readline()
01246         if line:
01247             self.check_physical(line)
01248         return line
01249 
01250     def run_check(self, check, argument_names):
01251         """
01252         Run a check plugin.
01253         """
01254         arguments = []
01255         for name in argument_names:
01256             arguments.append(getattr(self, name))
01257         return check(*arguments)
01258 
01259     def check_physical(self, line):
01260         """
01261         Run all physical checks on a raw input line.
01262         """
01263         self.physical_line = line
01264         if self.indent_char is None and line[:1] in WHITESPACE:
01265             self.indent_char = line[0]
01266         for name, check, argument_names in self._physical_checks:
01267             result = self.run_check(check, argument_names)
01268             if result is not None:
01269                 offset, text = result
01270                 self.report_error(self.line_number, offset, text, check)
01271 
01272     def build_tokens_line(self):
01273         """
01274         Build a logical line from tokens.
01275         """
01276         self.mapping = []
01277         logical = []
01278         comments = []
01279         length = 0
01280         previous = None
01281         for token in self.tokens:
01282             token_type, text = token[0:2]
01283             if token_type == tokenize.COMMENT:
01284                 comments.append(text)
01285                 continue
01286             if token_type in SKIP_TOKENS:
01287                 continue
01288             if token_type == tokenize.STRING:
01289                 text = mute_string(text)
01290             if previous:
01291                 end_row, end = previous[3]
01292                 start_row, start = token[2]
01293                 if end_row != start_row:    # different row
01294                     prev_text = self.lines[end_row - 1][end - 1]
01295                     if prev_text == ',' or (prev_text not in '{[('
01296                                             and text not in '}])'):
01297                         logical.append(' ')
01298                         length += 1
01299                 elif end != start:  # different column
01300                     fill = self.lines[end_row - 1][end:start]
01301                     logical.append(fill)
01302                     length += len(fill)
01303             self.mapping.append((length, token))
01304             logical.append(text)
01305             length += len(text)
01306             previous = token
01307         self.logical_line = ''.join(logical)
01308         self.noqa = comments and noqa(''.join(comments))
01309         # With Python 2, if the line ends with '\r\r\n' the assertion fails
01310         # assert self.logical_line.strip() == self.logical_line
01311 
01312     def check_logical(self):
01313         """
01314         Build a line from tokens and run all logical checks on it.
01315         """
01316         self.build_tokens_line()
01317         self.report.increment_logical_line()
01318         first_line = self.lines[self.mapping[0][1][2][0] - 1]
01319         indent = first_line[:self.mapping[0][1][2][1]]
01320         self.previous_indent_level = self.indent_level
01321         self.indent_level = expand_indent(indent)
01322         if self.verbose >= 2:
01323             print(self.logical_line[:80].rstrip())
01324         for name, check, argument_names in self._logical_checks:
01325             if self.verbose >= 4:
01326                 print('   ' + name)
01327             for result in self.run_check(check, argument_names):
01328                 offset, text = result
01329                 if isinstance(offset, tuple):
01330                     orig_number, orig_offset = offset
01331                 else:
01332                     for token_offset, token in self.mapping:
01333                         if offset >= token_offset:
01334                             orig_number = token[2][0]
01335                             orig_offset = (token[2][1] + offset - token_offset)
01336                 self.report_error(orig_number, orig_offset, text, check)
01337         self.previous_logical = self.logical_line
01338 
01339     def check_ast(self):
01340         try:
01341             tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
01342         except (SyntaxError, TypeError):
01343             return self.report_invalid_syntax()
01344         for name, cls, _ in self._ast_checks:
01345             checker = cls(tree, self.filename)
01346             for lineno, offset, text, check in checker.run():
01347                 if not noqa(self.lines[lineno - 1]):
01348                     self.report_error(lineno, offset, text, check)
01349 
01350     def generate_tokens(self):
01351         if self._io_error:
01352             self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
01353         tokengen = tokenize.generate_tokens(self.readline_check_physical)
01354         try:
01355             for token in tokengen:
01356                 yield token
01357         except (SyntaxError, tokenize.TokenError):
01358             self.report_invalid_syntax()
01359 
01360     def check_all(self, expected=None, line_offset=0):
01361         """
01362         Run all checks on the input file.
01363         """
01364         self.report.init_file(self.filename, self.lines, expected, line_offset)
01365         if self._ast_checks:
01366             self.check_ast()
01367         self.line_number = 0
01368         self.indent_char = None
01369         self.indent_level = 0
01370         self.previous_logical = ''
01371         self.tokens = []
01372         self.blank_lines = blank_lines_before_comment = 0
01373         parens = 0
01374         for token in self.generate_tokens():
01375             self.tokens.append(token)
01376             token_type, text = token[0:2]
01377             if self.verbose >= 3:
01378                 if token[2][0] == token[3][0]:
01379                     pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
01380                 else:
01381                     pos = 'l.%s' % token[3][0]
01382                 print('l.%s\t%s\t%s\t%r' %
01383                       (token[2][0], pos, tokenize.tok_name[token[0]], text))
01384             if token_type == tokenize.OP:
01385                 if text in '([{':
01386                     parens += 1
01387                 elif text in '}])':
01388                     parens -= 1
01389             elif not parens:
01390                 if token_type == tokenize.NEWLINE:
01391                     if self.blank_lines < blank_lines_before_comment:
01392                         self.blank_lines = blank_lines_before_comment
01393                     self.check_logical()
01394                     self.tokens = []
01395                     self.blank_lines = blank_lines_before_comment = 0
01396                 elif token_type == tokenize.NL:
01397                     if len(self.tokens) == 1:
01398                         # The physical line contains only this token.
01399                         self.blank_lines += 1
01400                     self.tokens = []
01401                 elif token_type == tokenize.COMMENT and len(self.tokens) == 1:
01402                     if blank_lines_before_comment < self.blank_lines:
01403                         blank_lines_before_comment = self.blank_lines
01404                     self.blank_lines = 0
01405                     if COMMENT_WITH_NL:
01406                         # The comment also ends a physical line
01407                         self.tokens = []
01408         return self.report.get_file_results()
01409 
01410 
01411 class BaseReport(object):
01412     """Collect the results of the checks."""
01413     print_filename = False
01414 
01415     def __init__(self, options):
01416         self._benchmark_keys = options.benchmark_keys
01417         self._ignore_code = options.ignore_code
01418         # Results
01419         self.elapsed = 0
01420         self.total_errors = 0
01421         self.counters = dict.fromkeys(self._benchmark_keys, 0)
01422         self.messages = {}
01423 
01424     def start(self):
01425         """Start the timer."""
01426         self._start_time = time.time()
01427 
01428     def stop(self):
01429         """Stop the timer."""
01430         self.elapsed = time.time() - self._start_time
01431 
01432     def init_file(self, filename, lines, expected, line_offset):
01433         """Signal a new file."""
01434         self.filename = filename
01435         self.lines = lines
01436         self.expected = expected or ()
01437         self.line_offset = line_offset
01438         self.file_errors = 0
01439         self.counters['files'] += 1
01440         self.counters['physical lines'] += len(lines)
01441 
01442     def increment_logical_line(self):
01443         """Signal a new logical line."""
01444         self.counters['logical lines'] += 1
01445 
01446     def error(self, line_number, offset, text, check):
01447         """Report an error, according to options."""
01448         code = text[:4]
01449         if self._ignore_code(code):
01450             return
01451         if code in self.counters:
01452             self.counters[code] += 1
01453         else:
01454             self.counters[code] = 1
01455             self.messages[code] = text[5:]
01456         # Don't care about expected errors or warnings
01457         if code in self.expected:
01458             return
01459         if self.print_filename and not self.file_errors:
01460             print(self.filename)
01461         self.file_errors += 1
01462         self.total_errors += 1
01463         return code
01464 
01465     def get_file_results(self):
01466         """Return the count of errors and warnings for this file."""
01467         return self.file_errors
01468 
01469     def get_count(self, prefix=''):
01470         """Return the total count of errors and warnings."""
01471         return sum([self.counters[key]
01472                     for key in self.messages if key.startswith(prefix)])
01473 
01474     def get_statistics(self, prefix=''):
01475         """
01476         Get statistics for message codes that start with the prefix.
01477 
01478         prefix='' matches all errors and warnings
01479         prefix='E' matches all errors
01480         prefix='W' matches all warnings
01481         prefix='E4' matches all errors that have to do with imports
01482         """
01483         return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
01484                 for key in sorted(self.messages) if key.startswith(prefix)]
01485 
01486     def print_statistics(self, prefix=''):
01487         """Print overall statistics (number of errors and warnings)."""
01488         for line in self.get_statistics(prefix):
01489             print(line)
01490 
01491     def print_benchmark(self):
01492         """Print benchmark numbers."""
01493         print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
01494         if self.elapsed:
01495             for key in self._benchmark_keys:
01496                 print('%-7d %s per second (%d total)' %
01497                       (self.counters[key] / self.elapsed, key,
01498                        self.counters[key]))
01499 
01500 
01501 class FileReport(BaseReport):
01502     """Collect the results of the checks and print only the filenames."""
01503     print_filename = True
01504 
01505 
01506 class StandardReport(BaseReport):
01507     """Collect and print the results of the checks."""
01508 
01509     def __init__(self, options):
01510         super(StandardReport, self).__init__(options)
01511         self._fmt = REPORT_FORMAT.get(options.format.lower(),
01512                                       options.format)
01513         self._repeat = options.repeat
01514         self._show_source = options.show_source
01515         self._show_pep8 = options.show_pep8
01516 
01517     def init_file(self, filename, lines, expected, line_offset):
01518         """Signal a new file."""
01519         self._deferred_print = []
01520         return super(StandardReport, self).init_file(
01521             filename, lines, expected, line_offset)
01522 
01523     def error(self, line_number, offset, text, check):
01524         """Report an error, according to options."""
01525         code = super(StandardReport, self).error(line_number, offset,
01526                                                  text, check)
01527         if code and (self.counters[code] == 1 or self._repeat):
01528             self._deferred_print.append(
01529                 (line_number, offset, code, text[5:], check.__doc__))
01530         return code
01531 
01532     def get_file_results(self):
01533         """Print the result and return the overall count for this file."""
01534         self._deferred_print.sort()
01535         for line_number, offset, code, text, doc in self._deferred_print:
01536             print(self._fmt % {
01537                 'path': self.filename,
01538                 'row': self.line_offset + line_number, 'col': offset + 1,
01539                 'code': code, 'text': text,
01540             })
01541             if self._show_source:
01542                 if line_number > len(self.lines):
01543                     line = ''
01544                 else:
01545                     line = self.lines[line_number - 1]
01546                 print(line.rstrip())
01547                 print(' ' * offset + '^')
01548             if self._show_pep8 and doc:
01549                 print(doc.lstrip('\n').rstrip())
01550         return self.file_errors
01551 
01552 
01553 class DiffReport(StandardReport):
01554     """Collect and print the results for the changed lines only."""
01555 
01556     def __init__(self, options):
01557         super(DiffReport, self).__init__(options)
01558         self._selected = options.selected_lines
01559 
01560     def error(self, line_number, offset, text, check):
01561         if line_number not in self._selected[self.filename]:
01562             return
01563         return super(DiffReport, self).error(line_number, offset, text, check)
01564 
01565 
01566 class StyleGuide(object):
01567     """Initialize a PEP-8 instance with few options."""
01568 
01569     def __init__(self, *args, **kwargs):
01570         # build options from the command line
01571         self.checker_class = kwargs.pop('checker_class', Checker)
01572         parse_argv = kwargs.pop('parse_argv', False)
01573         config_file = kwargs.pop('config_file', None)
01574         parser = kwargs.pop('parser', None)
01575         options, self.paths = process_options(
01576             parse_argv=parse_argv, config_file=config_file, parser=parser)
01577         if args or kwargs:
01578             # build options from dict
01579             options_dict = dict(*args, **kwargs)
01580             options.__dict__.update(options_dict)
01581             if 'paths' in options_dict:
01582                 self.paths = options_dict['paths']
01583 
01584         self.runner = self.input_file
01585         self.options = options
01586 
01587         if not options.reporter:
01588             options.reporter = BaseReport if options.quiet else StandardReport
01589 
01590         for index, value in enumerate(options.exclude):
01591             options.exclude[index] = value.rstrip('/')
01592         options.select = tuple(options.select or ())
01593         if not (options.select or options.ignore or
01594                 options.testsuite or options.doctest) and DEFAULT_IGNORE:
01595             # The default choice: ignore controversial checks
01596             options.ignore = tuple(DEFAULT_IGNORE.split(','))
01597         else:
01598             # Ignore all checks which are not explicitly selected
01599             options.ignore = ('',) if options.select else tuple(options.ignore)
01600         options.benchmark_keys = BENCHMARK_KEYS[:]
01601         options.ignore_code = self.ignore_code
01602         options.physical_checks = self.get_checks('physical_line')
01603         options.logical_checks = self.get_checks('logical_line')
01604         options.ast_checks = self.get_checks('tree')
01605         self.init_report()
01606 
01607     def init_report(self, reporter=None):
01608         """Initialize the report instance."""
01609         self.options.report = (reporter or self.options.reporter)(self.options)
01610         return self.options.report
01611 
01612     def check_files(self, paths=None):
01613         """Run all checks on the paths."""
01614         if paths is None:
01615             paths = self.paths
01616         report = self.options.report
01617         runner = self.runner
01618         report.start()
01619         try:
01620             for path in paths:
01621                 if os.path.isdir(path):
01622                     self.input_dir(path)
01623                 elif not self.excluded(path):
01624                     runner(path)
01625         except KeyboardInterrupt:
01626             print('... stopped')
01627         report.stop()
01628         return report
01629 
01630     def input_file(self, filename, lines=None, expected=None, line_offset=0):
01631         """Run all checks on a Python source file."""
01632         if self.options.verbose:
01633             print('checking %s' % filename)
01634         fchecker = self.checker_class(
01635             filename, lines=lines, options=self.options)
01636         return fchecker.check_all(expected=expected, line_offset=line_offset)
01637 
01638     def input_dir(self, dirname):
01639         """Check all files in this directory and all subdirectories."""
01640         dirname = dirname.rstrip('/')
01641         if self.excluded(dirname):
01642             return 0
01643         counters = self.options.report.counters
01644         verbose = self.options.verbose
01645         filepatterns = self.options.filename
01646         runner = self.runner
01647         for root, dirs, files in os.walk(dirname):
01648             if verbose:
01649                 print('directory ' + root)
01650             counters['directories'] += 1
01651             for subdir in sorted(dirs):
01652                 if self.excluded(subdir, root):
01653                     dirs.remove(subdir)
01654             for filename in sorted(files):
01655                 # contain a pattern that matches?
01656                 if ((filename_match(filename, filepatterns) and
01657                      not self.excluded(filename, root))):
01658                     runner(os.path.join(root, filename))
01659 
01660     def excluded(self, filename, parent=None):
01661         """
01662         Check if options.exclude contains a pattern that matches filename.
01663         """
01664         if not self.options.exclude:
01665             return False
01666         basename = os.path.basename(filename)
01667         if filename_match(basename, self.options.exclude):
01668             return True
01669         if parent:
01670             filename = os.path.join(parent, filename)
01671         return filename_match(filename, self.options.exclude)
01672 
01673     def ignore_code(self, code):
01674         """
01675         Check if the error code should be ignored.
01676 
01677         If 'options.select' contains a prefix of the error code,
01678         return False.  Else, if 'options.ignore' contains a prefix of
01679         the error code, return True.
01680         """
01681         if len(code) < 4 and any(s.startswith(code)
01682                                  for s in self.options.select):
01683             return False
01684         return (code.startswith(self.options.ignore) and
01685                 not code.startswith(self.options.select))
01686 
01687     def get_checks(self, argument_name):
01688         """
01689         Find all globally visible functions where the first argument name
01690         starts with argument_name and which contain selected tests.
01691         """
01692         checks = []
01693         for check, attrs in _checks[argument_name].items():
01694             (codes, args) = attrs
01695             if any(not (code and self.ignore_code(code)) for code in codes):
01696                 checks.append((check.__name__, check, args))
01697         return sorted(checks)
01698 
01699 
01700 def get_parser(prog='pep8', version=__version__):
01701     parser = OptionParser(prog=prog, version=version,
01702                           usage="%prog [options] input ...")
01703     parser.config_options = [
01704         'exclude', 'filename', 'select', 'ignore', 'max-line-length',
01705         'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
01706         'show-source', 'statistics', 'verbose']
01707     parser.add_option('-v', '--verbose', default=0, action='count',
01708                       help="print status messages, or debug with -vv")
01709     parser.add_option('-q', '--quiet', default=0, action='count',
01710                       help="report only file names, or nothing with -qq")
01711     parser.add_option('-r', '--repeat', default=True, action='store_true',
01712                       help="(obsolete) show all occurrences of the same error")
01713     parser.add_option('--first', action='store_false', dest='repeat',
01714                       help="show first occurrence of each error")
01715     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
01716                       help="exclude files or directories which match these "
01717                            "comma separated patterns (default: %default)")
01718     parser.add_option('--filename', metavar='patterns', default='*.py',
01719                       help="when parsing directories, only check filenames "
01720                            "matching these comma separated patterns "
01721                            "(default: %default)")
01722     parser.add_option('--select', metavar='errors', default='',
01723                       help="select errors and warnings (e.g. E,W6)")
01724     parser.add_option('--ignore', metavar='errors', default='',
01725                       help="skip errors and warnings (e.g. E4,W)")
01726     parser.add_option('--show-source', action='store_true',
01727                       help="show source code for each error")
01728     parser.add_option('--show-pep8', action='store_true',
01729                       help="show text of PEP 8 for each error "
01730                            "(implies --first)")
01731     parser.add_option('--statistics', action='store_true',
01732                       help="count errors and warnings")
01733     parser.add_option('--count', action='store_true',
01734                       help="print total number of errors and warnings "
01735                            "to standard error and set exit code to 1 if "
01736                            "total is not null")
01737     parser.add_option('--max-line-length', type='int', metavar='n',
01738                       default=MAX_LINE_LENGTH,
01739                       help="set maximum allowed line length "
01740                            "(default: %default)")
01741     parser.add_option('--hang-closing', action='store_true',
01742                       help="hang closing bracket instead of matching "
01743                            "indentation of opening bracket's line")
01744     parser.add_option('--format', metavar='format', default='default',
01745                       help="set the error format [default|pylint|<custom>]")
01746     parser.add_option('--diff', action='store_true',
01747                       help="report only lines changed according to the "
01748                            "unified diff received on STDIN")
01749     group = parser.add_option_group("Testing Options")
01750     if os.path.exists(TESTSUITE_PATH):
01751         group.add_option('--testsuite', metavar='dir',
01752                          help="run regression tests from dir")
01753         group.add_option('--doctest', action='store_true',
01754                          help="run doctest on myself")
01755     group.add_option('--benchmark', action='store_true',
01756                      help="measure processing speed")
01757     return parser
01758 
01759 
01760 def read_config(options, args, arglist, parser):
01761     """Read both user configuration and local configuration."""
01762     config = RawConfigParser()
01763 
01764     user_conf = options.config
01765     if user_conf and os.path.isfile(user_conf):
01766         if options.verbose:
01767             print('user configuration: %s' % user_conf)
01768         config.read(user_conf)
01769 
01770     parent = tail = args and os.path.abspath(os.path.commonprefix(args))
01771     while tail:
01772         if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
01773             if options.verbose:
01774                 print('local configuration: in %s' % parent)
01775             break
01776         parent, tail = os.path.split(parent)
01777 
01778     pep8_section = parser.prog
01779     if config.has_section(pep8_section):
01780         option_list = dict([(o.dest, o.type or o.action)
01781                             for o in parser.option_list])
01782 
01783         # First, read the default values
01784         new_options, _ = parser.parse_args([])
01785 
01786         # Second, parse the configuration
01787         for opt in config.options(pep8_section):
01788             if options.verbose > 1:
01789                 print("  %s = %s" % (opt, config.get(pep8_section, opt)))
01790             if opt.replace('_', '-') not in parser.config_options:
01791                 print("Unknown option: '%s'\n  not in [%s]" %
01792                       (opt, ' '.join(parser.config_options)))
01793                 sys.exit(1)
01794             normalized_opt = opt.replace('-', '_')
01795             opt_type = option_list[normalized_opt]
01796             if opt_type in ('int', 'count'):
01797                 value = config.getint(pep8_section, opt)
01798             elif opt_type == 'string':
01799                 value = config.get(pep8_section, opt)
01800             else:
01801                 assert opt_type in ('store_true', 'store_false')
01802                 value = config.getboolean(pep8_section, opt)
01803             setattr(new_options, normalized_opt, value)
01804 
01805         # Third, overwrite with the command-line options
01806         options, _ = parser.parse_args(arglist, values=new_options)
01807     options.doctest = options.testsuite = False
01808     return options
01809 
01810 
01811 def process_options(arglist=None, parse_argv=False, config_file=None,
01812                     parser=None):
01813     """Process options passed either via arglist or via command line args."""
01814     if not arglist and not parse_argv:
01815         # Don't read the command line if the module is used as a library.
01816         arglist = []
01817     if not parser:
01818         parser = get_parser()
01819     if not parser.has_option('--config'):
01820         if config_file is True:
01821             config_file = DEFAULT_CONFIG
01822         group = parser.add_option_group("Configuration", description=(
01823             "The project options are read from the [%s] section of the "
01824             "tox.ini file or the setup.cfg file located in any parent folder "
01825             "of the path(s) being processed.  Allowed options are: %s." %
01826             (parser.prog, ', '.join(parser.config_options))))
01827         group.add_option('--config', metavar='path', default=config_file,
01828                          help="user config file location (default: %default)")
01829     options, args = parser.parse_args(arglist)
01830     options.reporter = None
01831 
01832     if options.ensure_value('testsuite', False):
01833         args.append(options.testsuite)
01834     elif not options.ensure_value('doctest', False):
01835         if parse_argv and not args:
01836             if options.diff or any(os.path.exists(name)
01837                                    for name in PROJECT_CONFIG):
01838                 args = ['.']
01839             else:
01840                 parser.error('input not specified')
01841         options = read_config(options, args, arglist, parser)
01842         options.reporter = parse_argv and options.quiet == 1 and FileReport
01843 
01844     options.filename = options.filename and options.filename.split(',')
01845     options.exclude = options.exclude.split(',')
01846     options.select = options.select and options.select.split(',')
01847     options.ignore = options.ignore and options.ignore.split(',')
01848 
01849     if options.diff:
01850         options.reporter = DiffReport
01851         stdin = stdin_get_value()
01852         options.selected_lines = parse_udiff(stdin, options.filename, args[0])
01853         args = sorted(options.selected_lines)
01854 
01855     return options, args
01856 
01857 
01858 def _main():
01859     """Parse options and run checks on Python source."""
01860     pep8style = StyleGuide(parse_argv=True, config_file=True)
01861     options = pep8style.options
01862     if options.doctest or options.testsuite:
01863         from testsuite.support import run_tests
01864         report = run_tests(pep8style)
01865     else:
01866         report = pep8style.check_files()
01867     if options.statistics:
01868         report.print_statistics()
01869     if options.benchmark:
01870         report.print_benchmark()
01871     if options.testsuite and not options.quiet:
01872         report.print_results()
01873     if report.total_errors:
01874         if options.count:
01875             sys.stderr.write(str(report.total_errors) + '\n')
01876         sys.exit(1)
01877 
01878 if __name__ == '__main__':
01879     _main()