00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 r"""
00027 Check Python source code formatting, according to PEP 8:
00028 http://www.python.org/dev/peps/pep-0008/
00029
00030 For usage and a list of options, try this:
00031 $ python pep8.py -h
00032
00033 This program and its regression test suite live here:
00034 http://github.com/jcrocholl/pep8
00035
00036 Groups of errors and warnings:
00037 E errors
00038 W warnings
00039 100 indentation
00040 200 whitespace
00041 300 blank lines
00042 400 imports
00043 500 line length
00044 600 deprecation
00045 700 statements
00046 900 syntax error
00047 """
00048 __version__ = '1.4.7a0'
00049
00050 import os
00051 import sys
00052 import re
00053 import time
00054 import inspect
00055 import keyword
00056 import tokenize
00057 from optparse import OptionParser
00058 from fnmatch import fnmatch
00059 try:
00060 from configparser import RawConfigParser
00061 from io import TextIOWrapper
00062 except ImportError:
00063 from ConfigParser import RawConfigParser
00064
00065 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
00066 DEFAULT_IGNORE = 'E123,E226,E24'
00067 if sys.platform == 'win32':
00068 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
00069 else:
00070 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
00071 os.path.expanduser('~/.config'), 'pep8')
00072 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
00073 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
00074 MAX_LINE_LENGTH = 79
00075 REPORT_FORMAT = {
00076 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
00077 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
00078 }
00079
00080 PyCF_ONLY_AST = 1024
00081 SINGLETONS = frozenset(['False', 'None', 'True'])
00082 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
00083 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
00084 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
00085 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
00086 WS_NEEDED_OPERATORS = frozenset([
00087 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
00088 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
00089 WHITESPACE = frozenset(' \t')
00090 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
00091 tokenize.INDENT, tokenize.DEDENT])
00092 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
00093
00094 INDENT_REGEX = re.compile(r'([ \t]*)')
00095 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
00096 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+')
00097 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
00098 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
00099 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
00100 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
00101 COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
00102 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
00103 r'|\s*\(\s*([^)]*[^ )])\s*\))')
00104 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
00105 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
00106 LAMBDA_REGEX = re.compile(r'\blambda\b')
00107 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
00108
00109
00110
00111 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
00112
00113
00114
00115
00116
00117
00118
00119 def tabs_or_spaces(physical_line, indent_char):
00120 r"""
00121 Never mix tabs and spaces.
00122
00123 The most popular way of indenting Python is with spaces only. The
00124 second-most popular way is with tabs only. Code indented with a mixture
00125 of tabs and spaces should be converted to using spaces exclusively. When
00126 invoking the Python command line interpreter with the -t option, it issues
00127 warnings about code that illegally mixes tabs and spaces. When using -tt
00128 these warnings become errors. These options are highly recommended!
00129
00130 Okay: if a == 0:\n a = 1\n b = 1
00131 E101: if a == 0:\n a = 1\n\tb = 1
00132 """
00133 indent = INDENT_REGEX.match(physical_line).group(1)
00134 for offset, char in enumerate(indent):
00135 if char != indent_char:
00136 return offset, "E101 indentation contains mixed spaces and tabs"
00137
00138
00139 def tabs_obsolete(physical_line):
00140 r"""
00141 For new projects, spaces-only are strongly recommended over tabs. Most
00142 editors have features that make this easy to do.
00143
00144 Okay: if True:\n return
00145 W191: if True:\n\treturn
00146 """
00147 indent = INDENT_REGEX.match(physical_line).group(1)
00148 if '\t' in indent:
00149 return indent.index('\t'), "W191 indentation contains tabs"
00150
00151
00152 def trailing_whitespace(physical_line):
00153 r"""
00154 JCR: Trailing whitespace is superfluous.
00155 FBM: Except when it occurs as part of a blank line (i.e. the line is
00156 nothing but whitespace). According to Python docs[1] a line with only
00157 whitespace is considered a blank line, and is to be ignored. However,
00158 matching a blank line to its indentation level avoids mistakenly
00159 terminating a multi-line statement (e.g. class declaration) when
00160 pasting code into the standard Python interpreter.
00161
00162 [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
00163
00164 The warning returned varies on whether the line itself is blank, for easier
00165 filtering for those who want to indent their blank lines.
00166
00167 Okay: spam(1)\n#
00168 W291: spam(1) \n#
00169 W293: class Foo(object):\n \n bang = 12
00170 """
00171 physical_line = physical_line.rstrip('\n')
00172 physical_line = physical_line.rstrip('\r')
00173 physical_line = physical_line.rstrip('\x0c')
00174 stripped = physical_line.rstrip(' \t\v')
00175 if physical_line != stripped:
00176 if stripped:
00177 return len(stripped), "W291 trailing whitespace"
00178 else:
00179 return 0, "W293 blank line contains whitespace"
00180
00181
00182 def trailing_blank_lines(physical_line, lines, line_number):
00183 r"""
00184 JCR: Trailing blank lines are superfluous.
00185
00186 Okay: spam(1)
00187 W391: spam(1)\n
00188 """
00189 if not physical_line.rstrip() and line_number == len(lines):
00190 return 0, "W391 blank line at end of file"
00191
00192
00193 def missing_newline(physical_line):
00194 """
00195 JCR: The last line should have a newline.
00196
00197 Reports warning W292.
00198 """
00199 if physical_line.rstrip() == physical_line:
00200 return len(physical_line), "W292 no newline at end of file"
00201
00202
00203 def maximum_line_length(physical_line, max_line_length):
00204 """
00205 Limit all lines to a maximum of 79 characters.
00206
00207 There are still many devices around that are limited to 80 character
00208 lines; plus, limiting windows to 80 characters makes it possible to have
00209 several windows side-by-side. The default wrapping on such devices looks
00210 ugly. Therefore, please limit all lines to a maximum of 79 characters.
00211 For flowing long blocks of text (docstrings or comments), limiting the
00212 length to 72 characters is recommended.
00213
00214 Reports error E501.
00215 """
00216 line = physical_line.rstrip()
00217 length = len(line)
00218 if length > max_line_length and not noqa(line):
00219 if hasattr(line, 'decode'):
00220
00221 try:
00222 length = len(line.decode('utf-8'))
00223 except UnicodeError:
00224 pass
00225 if length > max_line_length:
00226 return (max_line_length, "E501 line too long "
00227 "(%d > %d characters)" % (length, max_line_length))
00228
00229
00230
00231
00232
00233
00234
00235 def blank_lines(logical_line, blank_lines, indent_level, line_number,
00236 previous_logical, previous_indent_level):
00237 r"""
00238 Separate top-level function and class definitions with two blank lines.
00239
00240 Method definitions inside a class are separated by a single blank line.
00241
00242 Extra blank lines may be used (sparingly) to separate groups of related
00243 functions. Blank lines may be omitted between a bunch of related
00244 one-liners (e.g. a set of dummy implementations).
00245
00246 Use blank lines in functions, sparingly, to indicate logical sections.
00247
00248 Okay: def a():\n pass\n\n\ndef b():\n pass
00249 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
00250
00251 E301: class Foo:\n b = 0\n def bar():\n pass
00252 E302: def a():\n pass\n\ndef b(n):\n pass
00253 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
00254 E303: def a():\n\n\n\n pass
00255 E304: @decorator\n\ndef a():\n pass
00256 """
00257 if line_number < 3 and not previous_logical:
00258 return
00259 if previous_logical.startswith('@'):
00260 if blank_lines:
00261 yield 0, "E304 blank lines found after function decorator"
00262 elif blank_lines > 2 or (indent_level and blank_lines == 2):
00263 yield 0, "E303 too many blank lines (%d)" % blank_lines
00264 elif logical_line.startswith(('def ', 'class ', '@')):
00265 if indent_level:
00266 if not (blank_lines or previous_indent_level < indent_level or
00267 DOCSTRING_REGEX.match(previous_logical)):
00268 yield 0, "E301 expected 1 blank line, found 0"
00269 elif blank_lines != 2:
00270 yield 0, "E302 expected 2 blank lines, found %d" % blank_lines
00271
00272
00273 def extraneous_whitespace(logical_line):
00274 """
00275 Avoid extraneous whitespace in the following situations:
00276
00277 - Immediately inside parentheses, brackets or braces.
00278
00279 - Immediately before a comma, semicolon, or colon.
00280
00281 Okay: spam(ham[1], {eggs: 2})
00282 E201: spam( ham[1], {eggs: 2})
00283 E201: spam(ham[ 1], {eggs: 2})
00284 E201: spam(ham[1], { eggs: 2})
00285 E202: spam(ham[1], {eggs: 2} )
00286 E202: spam(ham[1 ], {eggs: 2})
00287 E202: spam(ham[1], {eggs: 2 })
00288
00289 E203: if x == 4: print x, y; x, y = y , x
00290 E203: if x == 4: print x, y ; x, y = y, x
00291 E203: if x == 4 : print x, y; x, y = y, x
00292 """
00293 line = logical_line
00294 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
00295 text = match.group()
00296 char = text.strip()
00297 found = match.start()
00298 if text == char + ' ':
00299
00300 yield found + 1, "E201 whitespace after '%s'" % char
00301 elif line[found - 1] != ',':
00302 code = ('E202' if char in '}])' else 'E203')
00303 yield found, "%s whitespace before '%s'" % (code, char)
00304
00305
00306 def whitespace_around_keywords(logical_line):
00307 r"""
00308 Avoid extraneous whitespace around keywords.
00309
00310 Okay: True and False
00311 E271: True and False
00312 E272: True and False
00313 E273: True and\tFalse
00314 E274: True\tand False
00315 """
00316 for match in KEYWORD_REGEX.finditer(logical_line):
00317 before, after = match.groups()
00318
00319 if '\t' in before:
00320 yield match.start(1), "E274 tab before keyword"
00321 elif len(before) > 1:
00322 yield match.start(1), "E272 multiple spaces before keyword"
00323
00324 if '\t' in after:
00325 yield match.start(2), "E273 tab after keyword"
00326 elif len(after) > 1:
00327 yield match.start(2), "E271 multiple spaces after keyword"
00328
00329
00330 def missing_whitespace(logical_line):
00331 """
00332 JCR: Each comma, semicolon or colon should be followed by whitespace.
00333
00334 Okay: [a, b]
00335 Okay: (3,)
00336 Okay: a[1:4]
00337 Okay: a[:4]
00338 Okay: a[1:]
00339 Okay: a[1:4:2]
00340 E231: ['a','b']
00341 E231: foo(bar,baz)
00342 E231: [{'a':'b'}]
00343 """
00344 line = logical_line
00345 for index in range(len(line) - 1):
00346 char = line[index]
00347 if char in ',;:' and line[index + 1] not in WHITESPACE:
00348 before = line[:index]
00349 if char == ':' and before.count('[') > before.count(']') and \
00350 before.rfind('{') < before.rfind('['):
00351 continue
00352 if char == ',' and line[index + 1] == ')':
00353 continue
00354 yield index, "E231 missing whitespace after '%s'" % char
00355
00356
00357 def indentation(logical_line, previous_logical, indent_char,
00358 indent_level, previous_indent_level):
00359 r"""
00360 Use 4 spaces per indentation level.
00361
00362 For really old code that you don't want to mess up, you can continue to
00363 use 8-space tabs.
00364
00365 Okay: a = 1
00366 Okay: if a == 0:\n a = 1
00367 E111: a = 1
00368
00369 Okay: for item in items:\n pass
00370 E112: for item in items:\npass
00371
00372 Okay: a = 1\nb = 2
00373 E113: a = 1\n b = 2
00374 """
00375 if indent_char == ' ' and indent_level % 4:
00376 yield 0, "E111 indentation is not a multiple of four"
00377 indent_expect = previous_logical.endswith(':')
00378 if indent_expect and indent_level <= previous_indent_level:
00379 yield 0, "E112 expected an indented block"
00380 if indent_level > previous_indent_level and not indent_expect:
00381 yield 0, "E113 unexpected indentation"
00382
00383
00384 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
00385 noqa, verbose):
00386 r"""
00387 Continuation lines should align wrapped elements either vertically using
00388 Python's implicit line joining inside parentheses, brackets and braces, or
00389 using a hanging indent.
00390
00391 When using a hanging indent the following considerations should be applied:
00392
00393 - there should be no arguments on the first line, and
00394
00395 - further indentation should be used to clearly distinguish itself as a
00396 continuation line.
00397
00398 Okay: a = (\n)
00399 E123: a = (\n )
00400
00401 Okay: a = (\n 42)
00402 E121: a = (\n 42)
00403 E122: a = (\n42)
00404 E123: a = (\n 42\n )
00405 E124: a = (24,\n 42\n)
00406 E125: if (a or\n b):\n pass
00407 E126: a = (\n 42)
00408 E127: a = (24,\n 42)
00409 E128: a = (24,\n 42)
00410 """
00411 first_row = tokens[0][2][0]
00412 nrows = 1 + tokens[-1][2][0] - first_row
00413 if noqa or nrows == 1:
00414 return
00415
00416
00417
00418
00419
00420 indent_next = logical_line.endswith(':')
00421
00422 row = depth = 0
00423
00424 parens = [0] * nrows
00425
00426 rel_indent = [0] * nrows
00427
00428 indent_chances = {}
00429 last_indent = tokens[0][2]
00430 indent = [last_indent[1]]
00431 if verbose >= 3:
00432 print(">>> " + tokens[0][4].rstrip())
00433
00434 for token_type, text, start, end, line in tokens:
00435
00436 newline = row < start[0] - first_row
00437 if newline:
00438 row = start[0] - first_row
00439 newline = (not last_token_multiline and
00440 token_type not in (tokenize.NL, tokenize.NEWLINE))
00441
00442 if newline:
00443
00444 last_indent = start
00445 if verbose >= 3:
00446 print("... " + line.rstrip())
00447
00448
00449 rel_indent[row] = expand_indent(line) - indent_level
00450
00451 if depth:
00452
00453
00454 for open_row in range(row - 1, -1, -1):
00455 if parens[open_row]:
00456 break
00457 else:
00458
00459 open_row = 0
00460 hang = rel_indent[row] - rel_indent[open_row]
00461 close_bracket = (token_type == tokenize.OP and text in ']})')
00462 visual_indent = (not close_bracket and hang > 0 and
00463 indent_chances.get(start[1]))
00464
00465 if close_bracket and indent[depth]:
00466
00467 if start[1] != indent[depth]:
00468 yield (start, "E124 closing bracket does not match "
00469 "visual indentation")
00470 elif close_bracket and not hang:
00471
00472 if hang_closing:
00473 yield start, "E133 closing bracket is missing indentation"
00474 elif visual_indent is True:
00475
00476 if not indent[depth]:
00477 indent[depth] = start[1]
00478 elif visual_indent in (text, str):
00479
00480 pass
00481 elif indent[depth] and start[1] < indent[depth]:
00482
00483 yield (start, "E128 continuation line "
00484 "under-indented for visual indent")
00485 elif hang == 4 or (indent_next and rel_indent[row] == 8):
00486
00487 if close_bracket and not hang_closing:
00488 yield (start, "E123 closing bracket does not match "
00489 "indentation of opening bracket's line")
00490 else:
00491
00492 if hang <= 0:
00493 error = "E122", "missing indentation or outdented"
00494 elif indent[depth]:
00495 error = "E127", "over-indented for visual indent"
00496 elif hang % 4:
00497 error = "E121", "indentation is not a multiple of four"
00498 else:
00499 error = "E126", "over-indented for hanging indent"
00500 yield start, "%s continuation line %s" % error
00501
00502
00503 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
00504 and not indent[depth]):
00505 indent[depth] = start[1]
00506 indent_chances[start[1]] = True
00507 if verbose >= 4:
00508 print("bracket depth %s indent to %s" % (depth, start[1]))
00509
00510 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
00511 text in ('u', 'ur', 'b', 'br')):
00512 indent_chances[start[1]] = str
00513
00514 elif not indent_chances and not row and not depth and text == 'if':
00515 indent_chances[end[1] + 1] = True
00516
00517
00518 if token_type == tokenize.OP:
00519 if text in '([{':
00520 depth += 1
00521 indent.append(0)
00522 parens[row] += 1
00523 if verbose >= 4:
00524 print("bracket depth %s seen, col %s, visual min = %s" %
00525 (depth, start[1], indent[depth]))
00526 elif text in ')]}' and depth > 0:
00527
00528 prev_indent = indent.pop() or last_indent[1]
00529 for d in range(depth):
00530 if indent[d] > prev_indent:
00531 indent[d] = 0
00532 for ind in list(indent_chances):
00533 if ind >= prev_indent:
00534 del indent_chances[ind]
00535 depth -= 1
00536 if depth:
00537 indent_chances[indent[depth]] = True
00538 for idx in range(row, -1, -1):
00539 if parens[idx]:
00540 parens[idx] -= 1
00541 rel_indent[row] = rel_indent[idx]
00542 break
00543 assert len(indent) == depth + 1
00544 if start[1] not in indent_chances:
00545
00546 indent_chances[start[1]] = text
00547
00548 last_token_multiline = (start[0] != end[0])
00549
00550 if indent_next and expand_indent(line) == indent_level + 4:
00551 yield (last_indent, "E125 continuation line does not distinguish "
00552 "itself from next logical line")
00553
00554
00555 def whitespace_before_parameters(logical_line, tokens):
00556 """
00557 Avoid extraneous whitespace in the following situations:
00558
00559 - Immediately before the open parenthesis that starts the argument
00560 list of a function call.
00561
00562 - Immediately before the open parenthesis that starts an indexing or
00563 slicing.
00564
00565 Okay: spam(1)
00566 E211: spam (1)
00567
00568 Okay: dict['key'] = list[index]
00569 E211: dict ['key'] = list[index]
00570 E211: dict['key'] = list [index]
00571 """
00572 prev_type, prev_text, __, prev_end, __ = tokens[0]
00573 for index in range(1, len(tokens)):
00574 token_type, text, start, end, __ = tokens[index]
00575 if (token_type == tokenize.OP and
00576 text in '([' and
00577 start != prev_end and
00578 (prev_type == tokenize.NAME or prev_text in '}])') and
00579
00580 (index < 2 or tokens[index - 2][1] != 'class') and
00581
00582 not keyword.iskeyword(prev_text)):
00583 yield prev_end, "E211 whitespace before '%s'" % text
00584 prev_type = token_type
00585 prev_text = text
00586 prev_end = end
00587
00588
00589 def whitespace_around_operator(logical_line):
00590 r"""
00591 Avoid extraneous whitespace in the following situations:
00592
00593 - More than one space around an assignment (or other) operator to
00594 align it with another.
00595
00596 Okay: a = 12 + 3
00597 E221: a = 4 + 5
00598 E222: a = 4 + 5
00599 E223: a = 4\t+ 5
00600 E224: a = 4 +\t5
00601 """
00602 for match in OPERATOR_REGEX.finditer(logical_line):
00603 before, after = match.groups()
00604
00605 if '\t' in before:
00606 yield match.start(1), "E223 tab before operator"
00607 elif len(before) > 1:
00608 yield match.start(1), "E221 multiple spaces before operator"
00609
00610 if '\t' in after:
00611 yield match.start(2), "E224 tab after operator"
00612 elif len(after) > 1:
00613 yield match.start(2), "E222 multiple spaces after operator"
00614
00615
00616 def missing_whitespace_around_operator(logical_line, tokens):
00617 r"""
00618 - Always surround these binary operators with a single space on
00619 either side: assignment (=), augmented assignment (+=, -= etc.),
00620 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
00621 Booleans (and, or, not).
00622
00623 - Use spaces around arithmetic operators.
00624
00625 Okay: i = i + 1
00626 Okay: submitted += 1
00627 Okay: x = x * 2 - 1
00628 Okay: hypot2 = x * x + y * y
00629 Okay: c = (a + b) * (a - b)
00630 Okay: foo(bar, key='word', *args, **kwargs)
00631 Okay: alpha[:-i]
00632
00633 E225: i=i+1
00634 E225: submitted +=1
00635 E225: x = x /2 - 1
00636 E225: z = x **y
00637 E226: c = (a+b) * (a-b)
00638 E226: hypot2 = x*x + y*y
00639 E227: c = a|b
00640 E228: msg = fmt%(errno, errmsg)
00641 """
00642 parens = 0
00643 need_space = False
00644 prev_type = tokenize.OP
00645 prev_text = prev_end = None
00646 for token_type, text, start, end, line in tokens:
00647 if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
00648
00649 continue
00650 if text in ('(', 'lambda'):
00651 parens += 1
00652 elif text == ')':
00653 parens -= 1
00654 if need_space:
00655 if start != prev_end:
00656
00657 if need_space is not True and not need_space[1]:
00658 yield (need_space[0],
00659 "E225 missing whitespace around operator")
00660 need_space = False
00661 elif text == '>' and prev_text in ('<', '-'):
00662
00663
00664 pass
00665 else:
00666 if need_space is True or need_space[1]:
00667
00668 yield prev_end, "E225 missing whitespace around operator"
00669 else:
00670 code, optype = 'E226', 'arithmetic'
00671 if prev_text == '%':
00672 code, optype = 'E228', 'modulo'
00673 elif prev_text not in ARITHMETIC_OP:
00674 code, optype = 'E227', 'bitwise or shift'
00675 yield (need_space[0], "%s missing whitespace "
00676 "around %s operator" % (code, optype))
00677 need_space = False
00678 elif token_type == tokenize.OP and prev_end is not None:
00679 if text == '=' and parens:
00680
00681 pass
00682 elif text in WS_NEEDED_OPERATORS:
00683 need_space = True
00684 elif text in UNARY_OPERATORS:
00685
00686
00687
00688 if prev_type == tokenize.OP:
00689 binary_usage = (prev_text in '}])')
00690 elif prev_type == tokenize.NAME:
00691 binary_usage = (prev_text not in KEYWORDS)
00692 else:
00693 binary_usage = (prev_type not in SKIP_TOKENS)
00694
00695 if binary_usage:
00696 need_space = None
00697 elif text in WS_OPTIONAL_OPERATORS:
00698 need_space = None
00699
00700 if need_space is None:
00701
00702
00703 need_space = (prev_end, start != prev_end)
00704 elif need_space and start == prev_end:
00705
00706 yield prev_end, "E225 missing whitespace around operator"
00707 need_space = False
00708 prev_type = token_type
00709 prev_text = text
00710 prev_end = end
00711
00712
00713 def whitespace_around_comma(logical_line):
00714 r"""
00715 Avoid extraneous whitespace in the following situations:
00716
00717 - More than one space around an assignment (or other) operator to
00718 align it with another.
00719
00720 Note: these checks are disabled by default
00721
00722 Okay: a = (1, 2)
00723 E241: a = (1, 2)
00724 E242: a = (1,\t2)
00725 """
00726 line = logical_line
00727 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
00728 found = m.start() + 1
00729 if '\t' in m.group():
00730 yield found, "E242 tab after '%s'" % m.group()[0]
00731 else:
00732 yield found, "E241 multiple spaces after '%s'" % m.group()[0]
00733
00734
00735 def whitespace_around_named_parameter_equals(logical_line, tokens):
00736 """
00737 Don't use spaces around the '=' sign when used to indicate a
00738 keyword argument or a default parameter value.
00739
00740 Okay: def complex(real, imag=0.0):
00741 Okay: return magic(r=real, i=imag)
00742 Okay: boolean(a == b)
00743 Okay: boolean(a != b)
00744 Okay: boolean(a <= b)
00745 Okay: boolean(a >= b)
00746
00747 E251: def complex(real, imag = 0.0):
00748 E251: return magic(r = real, i = imag)
00749 """
00750 parens = 0
00751 no_space = False
00752 prev_end = None
00753 message = "E251 unexpected spaces around keyword / parameter equals"
00754 for token_type, text, start, end, line in tokens:
00755 if no_space:
00756 no_space = False
00757 if start != prev_end:
00758 yield (prev_end, message)
00759 elif token_type == tokenize.OP:
00760 if text == '(':
00761 parens += 1
00762 elif text == ')':
00763 parens -= 1
00764 elif parens and text == '=':
00765 no_space = True
00766 if start != prev_end:
00767 yield (prev_end, message)
00768 prev_end = end
00769
00770
00771 def whitespace_before_inline_comment(logical_line, tokens):
00772 """
00773 Separate inline comments by at least two spaces.
00774
00775 An inline comment is a comment on the same line as a statement. Inline
00776 comments should be separated by at least two spaces from the statement.
00777 They should start with a # and a single space.
00778
00779 Okay: x = x + 1 # Increment x
00780 Okay: x = x + 1 # Increment x
00781 E261: x = x + 1 # Increment x
00782 E262: x = x + 1 #Increment x
00783 E262: x = x + 1 # Increment x
00784 """
00785 prev_end = (0, 0)
00786 for token_type, text, start, end, line in tokens:
00787 if token_type == tokenize.COMMENT:
00788 if not line[:start[1]].strip():
00789 continue
00790 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
00791 yield (prev_end,
00792 "E261 at least two spaces before inline comment")
00793 symbol, sp, comment = text.partition(' ')
00794 if symbol not in ('#', '#:') or comment[:1].isspace():
00795 yield start, "E262 inline comment should start with '# '"
00796 elif token_type != tokenize.NL:
00797 prev_end = end
00798
00799
00800 def imports_on_separate_lines(logical_line):
00801 r"""
00802 Imports should usually be on separate lines.
00803
00804 Okay: import os\nimport sys
00805 E401: import sys, os
00806
00807 Okay: from subprocess import Popen, PIPE
00808 Okay: from myclas import MyClass
00809 Okay: from foo.bar.yourclass import YourClass
00810 Okay: import myclass
00811 Okay: import foo.bar.yourclass
00812 """
00813 line = logical_line
00814 if line.startswith('import '):
00815 found = line.find(',')
00816 if -1 < found and ';' not in line[:found]:
00817 yield found, "E401 multiple imports on one line"
00818
00819
00820 def compound_statements(logical_line):
00821 r"""
00822 Compound statements (multiple statements on the same line) are
00823 generally discouraged.
00824
00825 While sometimes it's okay to put an if/for/while with a small body
00826 on the same line, never do this for multi-clause statements. Also
00827 avoid folding such long lines!
00828
00829 Okay: if foo == 'blah':\n do_blah_thing()
00830 Okay: do_one()
00831 Okay: do_two()
00832 Okay: do_three()
00833
00834 E701: if foo == 'blah': do_blah_thing()
00835 E701: for x in lst: total += x
00836 E701: while t < 10: t = delay()
00837 E701: if foo == 'blah': do_blah_thing()
00838 E701: else: do_non_blah_thing()
00839 E701: try: something()
00840 E701: finally: cleanup()
00841 E701: if foo == 'blah': one(); two(); three()
00842
00843 E702: do_one(); do_two(); do_three()
00844 E703: do_four(); # useless semicolon
00845 """
00846 line = logical_line
00847 last_char = len(line) - 1
00848 found = line.find(':')
00849 while -1 < found < last_char:
00850 before = line[:found]
00851 if (before.count('{') <= before.count('}') and
00852 before.count('[') <= before.count(']') and
00853 before.count('(') <= before.count(')') and
00854 not LAMBDA_REGEX.search(before)):
00855 yield found, "E701 multiple statements on one line (colon)"
00856 found = line.find(':', found + 1)
00857 found = line.find(';')
00858 while -1 < found:
00859 if found < last_char:
00860 yield found, "E702 multiple statements on one line (semicolon)"
00861 else:
00862 yield found, "E703 statement ends with a semicolon"
00863 found = line.find(';', found + 1)
00864
00865
00866 def explicit_line_join(logical_line, tokens):
00867 r"""
00868 Avoid explicit line join between brackets.
00869
00870 The preferred way of wrapping long lines is by using Python's implied line
00871 continuation inside parentheses, brackets and braces. Long lines can be
00872 broken over multiple lines by wrapping expressions in parentheses. These
00873 should be used in preference to using a backslash for line continuation.
00874
00875 E502: aaa = [123, \\n 123]
00876 E502: aaa = ("bbb " \\n "ccc")
00877
00878 Okay: aaa = [123,\n 123]
00879 Okay: aaa = ("bbb "\n "ccc")
00880 Okay: aaa = "bbb " \\n "ccc"
00881 """
00882 prev_start = prev_end = parens = 0
00883 for token_type, text, start, end, line in tokens:
00884 if start[0] != prev_start and parens and backslash:
00885 yield backslash, "E502 the backslash is redundant between brackets"
00886 if end[0] != prev_end:
00887 if line.rstrip('\r\n').endswith('\\'):
00888 backslash = (end[0], len(line.splitlines()[-1]) - 1)
00889 else:
00890 backslash = None
00891 prev_start = prev_end = end[0]
00892 else:
00893 prev_start = start[0]
00894 if token_type == tokenize.OP:
00895 if text in '([{':
00896 parens += 1
00897 elif text in ')]}':
00898 parens -= 1
00899
00900
00901 def comparison_to_singleton(logical_line, noqa):
00902 """
00903 Comparisons to singletons like None should always be done
00904 with "is" or "is not", never the equality operators.
00905
00906 Okay: if arg is not None:
00907 E711: if arg != None:
00908 E712: if arg == True:
00909
00910 Also, beware of writing if x when you really mean if x is not None --
00911 e.g. when testing whether a variable or argument that defaults to None was
00912 set to some other value. The other value might have a type (such as a
00913 container) that could be false in a boolean context!
00914 """
00915 match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
00916 if match:
00917 same = (match.group(1) == '==')
00918 singleton = match.group(2)
00919 msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
00920 if singleton in ('None',):
00921 code = 'E711'
00922 else:
00923 code = 'E712'
00924 nonzero = ((singleton == 'True' and same) or
00925 (singleton == 'False' and not same))
00926 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
00927 yield match.start(1), ("%s comparison to %s should be %s" %
00928 (code, singleton, msg))
00929
00930
00931 def comparison_type(logical_line):
00932 """
00933 Object type comparisons should always use isinstance() instead of
00934 comparing types directly.
00935
00936 Okay: if isinstance(obj, int):
00937 E721: if type(obj) is type(1):
00938
00939 When checking if an object is a string, keep in mind that it might be a
00940 unicode string too! In Python 2.3, str and unicode have a common base
00941 class, basestring, so you can do:
00942
00943 Okay: if isinstance(obj, basestring):
00944 Okay: if type(a1) is type(b1):
00945 """
00946 match = COMPARE_TYPE_REGEX.search(logical_line)
00947 if match:
00948 inst = match.group(1)
00949 if inst and isidentifier(inst) and inst not in SINGLETONS:
00950 return
00951 yield match.start(), "E721 do not compare types, use 'isinstance()'"
00952
00953
00954 def python_3000_has_key(logical_line):
00955 r"""
00956 The {}.has_key() method is removed in the Python 3.
00957 Use the 'in' operation instead.
00958
00959 Okay: if "alph" in d:\n print d["alph"]
00960 W601: assert d.has_key('alph')
00961 """
00962 pos = logical_line.find('.has_key(')
00963 if pos > -1:
00964 yield pos, "W601 .has_key() is deprecated, use 'in'"
00965
00966
00967 def python_3000_raise_comma(logical_line):
00968 """
00969 When raising an exception, use "raise ValueError('message')"
00970 instead of the older form "raise ValueError, 'message'".
00971
00972 The paren-using form is preferred because when the exception arguments
00973 are long or include string formatting, you don't need to use line
00974 continuation characters thanks to the containing parentheses. The older
00975 form is removed in Python 3.
00976
00977 Okay: raise DummyError("Message")
00978 W602: raise DummyError, "Message"
00979 """
00980 match = RAISE_COMMA_REGEX.match(logical_line)
00981 if match and not RERAISE_COMMA_REGEX.match(logical_line):
00982 yield match.end() - 1, "W602 deprecated form of raising exception"
00983
00984
00985 def python_3000_not_equal(logical_line):
00986 """
00987 != can also be written <>, but this is an obsolete usage kept for
00988 backwards compatibility only. New code should always use !=.
00989 The older syntax is removed in Python 3.
00990
00991 Okay: if a != 'no':
00992 W603: if a <> 'no':
00993 """
00994 pos = logical_line.find('<>')
00995 if pos > -1:
00996 yield pos, "W603 '<>' is deprecated, use '!='"
00997
00998
00999 def python_3000_backticks(logical_line):
01000 """
01001 Backticks are removed in Python 3.
01002 Use repr() instead.
01003
01004 Okay: val = repr(1 + 2)
01005 W604: val = `1 + 2`
01006 """
01007 pos = logical_line.find('`')
01008 if pos > -1:
01009 yield pos, "W604 backticks are deprecated, use 'repr()'"
01010
01011
01012
01013
01014
01015
01016
01017 if '' == ''.encode():
01018
01019 def readlines(filename):
01020 f = open(filename)
01021 try:
01022 return f.readlines()
01023 finally:
01024 f.close()
01025 isidentifier = re.compile(r'[a-zA-Z_]\w*').match
01026 stdin_get_value = sys.stdin.read
01027 else:
01028
01029 def readlines(filename):
01030 f = open(filename, 'rb')
01031 try:
01032 coding, lines = tokenize.detect_encoding(f.readline)
01033 f = TextIOWrapper(f, coding, line_buffering=True)
01034 return [l.decode(coding) for l in lines] + f.readlines()
01035 except (LookupError, SyntaxError, UnicodeError):
01036 f.close()
01037
01038 f = open(filename, encoding='latin-1')
01039 return f.readlines()
01040 finally:
01041 f.close()
01042 isidentifier = str.isidentifier
01043
01044 def stdin_get_value():
01045 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
01046 readlines.__doc__ = " Read the source code."
01047 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
01048
01049
01050 def expand_indent(line):
01051 r"""
01052 Return the amount of indentation.
01053 Tabs are expanded to the next multiple of 8.
01054
01055 >>> expand_indent(' ')
01056 4
01057 >>> expand_indent('\t')
01058 8
01059 >>> expand_indent(' \t')
01060 8
01061 >>> expand_indent(' \t')
01062 8
01063 >>> expand_indent(' \t')
01064 16
01065 """
01066 if '\t' not in line:
01067 return len(line) - len(line.lstrip())
01068 result = 0
01069 for char in line:
01070 if char == '\t':
01071 result = result // 8 * 8 + 8
01072 elif char == ' ':
01073 result += 1
01074 else:
01075 break
01076 return result
01077
01078
01079 def mute_string(text):
01080 """
01081 Replace contents with 'xxx' to prevent syntax matching.
01082
01083 >>> mute_string('"abc"')
01084 '"xxx"'
01085 >>> mute_string("'''abc'''")
01086 "'''xxx'''"
01087 >>> mute_string("r'abc'")
01088 "r'xxx'"
01089 """
01090 # String modifiers (e.g. u or r)
01091 start = text.index(text[-1]) + 1
01092 end = len(text) - 1
01093 # Triple quotes
01094 if text[-3:] in ('"""', "'''"):
01095 start += 2
01096 end -= 2
01097 return text[:start] + 'x' * (end - start) + text[end:]
01098
01099
01100 def parse_udiff(diff, patterns=None, parent='.'):
01101 """Return a dictionary of matching lines."""
01102 # For each file of the diff, the entry key is the filename,
01103 # and the value is a set of row numbers to consider.
01104 rv = {}
01105 path = nrows = None
01106 for line in diff.splitlines():
01107 if nrows:
01108 if line[:1] != '-':
01109 nrows -= 1
01110 continue
01111 if line[:3] == '@@ ':
01112 hunk_match = HUNK_REGEX.match(line)
01113 row, nrows = [int(g or '1') for g in hunk_match.groups()]
01114 rv[path].update(range(row, row + nrows))
01115 elif line[:3] == '+++':
01116 path = line[4:].split('\t', 1)[0]
01117 if path[:2] == 'b/':
01118 path = path[2:]
01119 rv[path] = set()
01120 return dict([(os.path.join(parent, path), rows)
01121 for (path, rows) in rv.items()
01122 if rows and filename_match(path, patterns)])
01123
01124
01125 def filename_match(filename, patterns, default=True):
01126 """
01127 Check if patterns contains a pattern that matches filename.
01128 If patterns is unspecified, this always returns True.
01129 """
01130 if not patterns:
01131 return default
01132 return any(fnmatch(filename, pattern) for pattern in patterns)
01133
01134
01135 ##############################################################################
01136 # Framework to run all checks
01137 ##############################################################################
01138
01139
01140 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
01141
01142
01143 def register_check(check, codes=None):
01144 """
01145 Register a new check object.
01146 """
01147 def _add_check(check, kind, codes, args):
01148 if check in _checks[kind]:
01149 _checks[kind][check][0].extend(codes or [])
01150 else:
01151 _checks[kind][check] = (codes or [''], args)
01152 if inspect.isfunction(check):
01153 args = inspect.getargspec(check)[0]
01154 if args and args[0] in ('physical_line', 'logical_line'):
01155 if codes is None:
01156 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
01157 _add_check(check, args[0], codes, args)
01158 elif inspect.isclass(check):
01159 if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
01160 _add_check(check, 'tree', codes, None)
01161
01162
01163 def init_checks_registry():
01164 """
01165 Register all globally visible functions where the first argument name
01166 is 'physical_line' or 'logical_line'.
01167 """
01168 mod = inspect.getmodule(register_check)
01169 for (name, function) in inspect.getmembers(mod, inspect.isfunction):
01170 register_check(function)
01171 init_checks_registry()
01172
01173
01174 class Checker(object):
01175 """
01176 Load a Python source file, tokenize it, check coding style.
01177 """
01178
01179 def __init__(self, filename=None, lines=None,
01180 options=None, report=None, **kwargs):
01181 if options is None:
01182 options = StyleGuide(kwargs).options
01183 else:
01184 assert not kwargs
01185 self._io_error = None
01186 self._physical_checks = options.physical_checks
01187 self._logical_checks = options.logical_checks
01188 self._ast_checks = options.ast_checks
01189 self.max_line_length = options.max_line_length
01190 self.hang_closing = options.hang_closing
01191 self.verbose = options.verbose
01192 self.filename = filename
01193 if filename is None:
01194 self.filename = 'stdin'
01195 self.lines = lines or []
01196 elif filename == '-':
01197 self.filename = 'stdin'
01198 self.lines = stdin_get_value().splitlines(True)
01199 elif lines is None:
01200 try:
01201 self.lines = readlines(filename)
01202 except IOError:
01203 exc_type, exc = sys.exc_info()[:2]
01204 self._io_error = '%s: %s' % (exc_type.__name__, exc)
01205 self.lines = []
01206 else:
01207 self.lines = lines
01208 if self.lines:
01209 ord0 = ord(self.lines[0][0])
01210 if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM
01211 if ord0 == 0xfeff:
01212 self.lines[0] = self.lines[0][1:]
01213 elif self.lines[0][:3] == '\xef\xbb\xbf':
01214 self.lines[0] = self.lines[0][3:]
01215 self.report = report or options.report
01216 self.report_error = self.report.error
01217
01218 def report_invalid_syntax(self):
01219 exc_type, exc = sys.exc_info()[:2]
01220 if len(exc.args) > 1:
01221 offset = exc.args[1]
01222 if len(offset) > 2:
01223 offset = offset[1:3]
01224 else:
01225 offset = (1, 0)
01226 self.report_error(offset[0], offset[1] or 0,
01227 'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
01228 self.report_invalid_syntax)
01229 report_invalid_syntax.__doc__ = " Check if the syntax is valid."
01230
01231 def readline(self):
01232 """
01233 Get the next line from the input buffer.
01234 """
01235 self.line_number += 1
01236 if self.line_number > len(self.lines):
01237 return ''
01238 return self.lines[self.line_number - 1]
01239
01240 def readline_check_physical(self):
01241 """
01242 Check and return the next physical line. This method can be
01243 used to feed tokenize.generate_tokens.
01244 """
01245 line = self.readline()
01246 if line:
01247 self.check_physical(line)
01248 return line
01249
01250 def run_check(self, check, argument_names):
01251 """
01252 Run a check plugin.
01253 """
01254 arguments = []
01255 for name in argument_names:
01256 arguments.append(getattr(self, name))
01257 return check(*arguments)
01258
01259 def check_physical(self, line):
01260 """
01261 Run all physical checks on a raw input line.
01262 """
01263 self.physical_line = line
01264 if self.indent_char is None and line[:1] in WHITESPACE:
01265 self.indent_char = line[0]
01266 for name, check, argument_names in self._physical_checks:
01267 result = self.run_check(check, argument_names)
01268 if result is not None:
01269 offset, text = result
01270 self.report_error(self.line_number, offset, text, check)
01271
01272 def build_tokens_line(self):
01273 """
01274 Build a logical line from tokens.
01275 """
01276 self.mapping = []
01277 logical = []
01278 comments = []
01279 length = 0
01280 previous = None
01281 for token in self.tokens:
01282 token_type, text = token[0:2]
01283 if token_type == tokenize.COMMENT:
01284 comments.append(text)
01285 continue
01286 if token_type in SKIP_TOKENS:
01287 continue
01288 if token_type == tokenize.STRING:
01289 text = mute_string(text)
01290 if previous:
01291 end_row, end = previous[3]
01292 start_row, start = token[2]
01293 if end_row != start_row: # different row
01294 prev_text = self.lines[end_row - 1][end - 1]
01295 if prev_text == ',' or (prev_text not in '{[('
01296 and text not in '}])'):
01297 logical.append(' ')
01298 length += 1
01299 elif end != start: # different column
01300 fill = self.lines[end_row - 1][end:start]
01301 logical.append(fill)
01302 length += len(fill)
01303 self.mapping.append((length, token))
01304 logical.append(text)
01305 length += len(text)
01306 previous = token
01307 self.logical_line = ''.join(logical)
01308 self.noqa = comments and noqa(''.join(comments))
01309 # With Python 2, if the line ends with '\r\r\n' the assertion fails
01310 # assert self.logical_line.strip() == self.logical_line
01311
01312 def check_logical(self):
01313 """
01314 Build a line from tokens and run all logical checks on it.
01315 """
01316 self.build_tokens_line()
01317 self.report.increment_logical_line()
01318 first_line = self.lines[self.mapping[0][1][2][0] - 1]
01319 indent = first_line[:self.mapping[0][1][2][1]]
01320 self.previous_indent_level = self.indent_level
01321 self.indent_level = expand_indent(indent)
01322 if self.verbose >= 2:
01323 print(self.logical_line[:80].rstrip())
01324 for name, check, argument_names in self._logical_checks:
01325 if self.verbose >= 4:
01326 print(' ' + name)
01327 for result in self.run_check(check, argument_names):
01328 offset, text = result
01329 if isinstance(offset, tuple):
01330 orig_number, orig_offset = offset
01331 else:
01332 for token_offset, token in self.mapping:
01333 if offset >= token_offset:
01334 orig_number = token[2][0]
01335 orig_offset = (token[2][1] + offset - token_offset)
01336 self.report_error(orig_number, orig_offset, text, check)
01337 self.previous_logical = self.logical_line
01338
01339 def check_ast(self):
01340 try:
01341 tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
01342 except (SyntaxError, TypeError):
01343 return self.report_invalid_syntax()
01344 for name, cls, _ in self._ast_checks:
01345 checker = cls(tree, self.filename)
01346 for lineno, offset, text, check in checker.run():
01347 if not noqa(self.lines[lineno - 1]):
01348 self.report_error(lineno, offset, text, check)
01349
01350 def generate_tokens(self):
01351 if self._io_error:
01352 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
01353 tokengen = tokenize.generate_tokens(self.readline_check_physical)
01354 try:
01355 for token in tokengen:
01356 yield token
01357 except (SyntaxError, tokenize.TokenError):
01358 self.report_invalid_syntax()
01359
01360 def check_all(self, expected=None, line_offset=0):
01361 """
01362 Run all checks on the input file.
01363 """
01364 self.report.init_file(self.filename, self.lines, expected, line_offset)
01365 if self._ast_checks:
01366 self.check_ast()
01367 self.line_number = 0
01368 self.indent_char = None
01369 self.indent_level = 0
01370 self.previous_logical = ''
01371 self.tokens = []
01372 self.blank_lines = blank_lines_before_comment = 0
01373 parens = 0
01374 for token in self.generate_tokens():
01375 self.tokens.append(token)
01376 token_type, text = token[0:2]
01377 if self.verbose >= 3:
01378 if token[2][0] == token[3][0]:
01379 pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
01380 else:
01381 pos = 'l.%s' % token[3][0]
01382 print('l.%s\t%s\t%s\t%r' %
01383 (token[2][0], pos, tokenize.tok_name[token[0]], text))
01384 if token_type == tokenize.OP:
01385 if text in '([{':
01386 parens += 1
01387 elif text in '}])':
01388 parens -= 1
01389 elif not parens:
01390 if token_type == tokenize.NEWLINE:
01391 if self.blank_lines < blank_lines_before_comment:
01392 self.blank_lines = blank_lines_before_comment
01393 self.check_logical()
01394 self.tokens = []
01395 self.blank_lines = blank_lines_before_comment = 0
01396 elif token_type == tokenize.NL:
01397 if len(self.tokens) == 1:
01398 # The physical line contains only this token.
01399 self.blank_lines += 1
01400 self.tokens = []
01401 elif token_type == tokenize.COMMENT and len(self.tokens) == 1:
01402 if blank_lines_before_comment < self.blank_lines:
01403 blank_lines_before_comment = self.blank_lines
01404 self.blank_lines = 0
01405 if COMMENT_WITH_NL:
01406 # The comment also ends a physical line
01407 self.tokens = []
01408 return self.report.get_file_results()
01409
01410
01411 class BaseReport(object):
01412 """Collect the results of the checks."""
01413 print_filename = False
01414
01415 def __init__(self, options):
01416 self._benchmark_keys = options.benchmark_keys
01417 self._ignore_code = options.ignore_code
01418 # Results
01419 self.elapsed = 0
01420 self.total_errors = 0
01421 self.counters = dict.fromkeys(self._benchmark_keys, 0)
01422 self.messages = {}
01423
01424 def start(self):
01425 """Start the timer."""
01426 self._start_time = time.time()
01427
01428 def stop(self):
01429 """Stop the timer."""
01430 self.elapsed = time.time() - self._start_time
01431
01432 def init_file(self, filename, lines, expected, line_offset):
01433 """Signal a new file."""
01434 self.filename = filename
01435 self.lines = lines
01436 self.expected = expected or ()
01437 self.line_offset = line_offset
01438 self.file_errors = 0
01439 self.counters['files'] += 1
01440 self.counters['physical lines'] += len(lines)
01441
01442 def increment_logical_line(self):
01443 """Signal a new logical line."""
01444 self.counters['logical lines'] += 1
01445
01446 def error(self, line_number, offset, text, check):
01447 """Report an error, according to options."""
01448 code = text[:4]
01449 if self._ignore_code(code):
01450 return
01451 if code in self.counters:
01452 self.counters[code] += 1
01453 else:
01454 self.counters[code] = 1
01455 self.messages[code] = text[5:]
01456 # Don't care about expected errors or warnings
01457 if code in self.expected:
01458 return
01459 if self.print_filename and not self.file_errors:
01460 print(self.filename)
01461 self.file_errors += 1
01462 self.total_errors += 1
01463 return code
01464
01465 def get_file_results(self):
01466 """Return the count of errors and warnings for this file."""
01467 return self.file_errors
01468
01469 def get_count(self, prefix=''):
01470 """Return the total count of errors and warnings."""
01471 return sum([self.counters[key]
01472 for key in self.messages if key.startswith(prefix)])
01473
01474 def get_statistics(self, prefix=''):
01475 """
01476 Get statistics for message codes that start with the prefix.
01477
01478 prefix='' matches all errors and warnings
01479 prefix='E' matches all errors
01480 prefix='W' matches all warnings
01481 prefix='E4' matches all errors that have to do with imports
01482 """
01483 return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
01484 for key in sorted(self.messages) if key.startswith(prefix)]
01485
01486 def print_statistics(self, prefix=''):
01487 """Print overall statistics (number of errors and warnings)."""
01488 for line in self.get_statistics(prefix):
01489 print(line)
01490
01491 def print_benchmark(self):
01492 """Print benchmark numbers."""
01493 print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
01494 if self.elapsed:
01495 for key in self._benchmark_keys:
01496 print('%-7d %s per second (%d total)' %
01497 (self.counters[key] / self.elapsed, key,
01498 self.counters[key]))
01499
01500
01501 class FileReport(BaseReport):
01502 """Collect the results of the checks and print only the filenames."""
01503 print_filename = True
01504
01505
01506 class StandardReport(BaseReport):
01507 """Collect and print the results of the checks."""
01508
01509 def __init__(self, options):
01510 super(StandardReport, self).__init__(options)
01511 self._fmt = REPORT_FORMAT.get(options.format.lower(),
01512 options.format)
01513 self._repeat = options.repeat
01514 self._show_source = options.show_source
01515 self._show_pep8 = options.show_pep8
01516
01517 def init_file(self, filename, lines, expected, line_offset):
01518 """Signal a new file."""
01519 self._deferred_print = []
01520 return super(StandardReport, self).init_file(
01521 filename, lines, expected, line_offset)
01522
01523 def error(self, line_number, offset, text, check):
01524 """Report an error, according to options."""
01525 code = super(StandardReport, self).error(line_number, offset,
01526 text, check)
01527 if code and (self.counters[code] == 1 or self._repeat):
01528 self._deferred_print.append(
01529 (line_number, offset, code, text[5:], check.__doc__))
01530 return code
01531
01532 def get_file_results(self):
01533 """Print the result and return the overall count for this file."""
01534 self._deferred_print.sort()
01535 for line_number, offset, code, text, doc in self._deferred_print:
01536 print(self._fmt % {
01537 'path': self.filename,
01538 'row': self.line_offset + line_number, 'col': offset + 1,
01539 'code': code, 'text': text,
01540 })
01541 if self._show_source:
01542 if line_number > len(self.lines):
01543 line = ''
01544 else:
01545 line = self.lines[line_number - 1]
01546 print(line.rstrip())
01547 print(' ' * offset + '^')
01548 if self._show_pep8 and doc:
01549 print(doc.lstrip('\n').rstrip())
01550 return self.file_errors
01551
01552
01553 class DiffReport(StandardReport):
01554 """Collect and print the results for the changed lines only."""
01555
01556 def __init__(self, options):
01557 super(DiffReport, self).__init__(options)
01558 self._selected = options.selected_lines
01559
01560 def error(self, line_number, offset, text, check):
01561 if line_number not in self._selected[self.filename]:
01562 return
01563 return super(DiffReport, self).error(line_number, offset, text, check)
01564
01565
01566 class StyleGuide(object):
01567 """Initialize a PEP-8 instance with few options."""
01568
01569 def __init__(self, *args, **kwargs):
01570 # build options from the command line
01571 self.checker_class = kwargs.pop('checker_class', Checker)
01572 parse_argv = kwargs.pop('parse_argv', False)
01573 config_file = kwargs.pop('config_file', None)
01574 parser = kwargs.pop('parser', None)
01575 options, self.paths = process_options(
01576 parse_argv=parse_argv, config_file=config_file, parser=parser)
01577 if args or kwargs:
01578 # build options from dict
01579 options_dict = dict(*args, **kwargs)
01580 options.__dict__.update(options_dict)
01581 if 'paths' in options_dict:
01582 self.paths = options_dict['paths']
01583
01584 self.runner = self.input_file
01585 self.options = options
01586
01587 if not options.reporter:
01588 options.reporter = BaseReport if options.quiet else StandardReport
01589
01590 for index, value in enumerate(options.exclude):
01591 options.exclude[index] = value.rstrip('/')
01592 options.select = tuple(options.select or ())
01593 if not (options.select or options.ignore or
01594 options.testsuite or options.doctest) and DEFAULT_IGNORE:
01595 # The default choice: ignore controversial checks
01596 options.ignore = tuple(DEFAULT_IGNORE.split(','))
01597 else:
01598 # Ignore all checks which are not explicitly selected
01599 options.ignore = ('',) if options.select else tuple(options.ignore)
01600 options.benchmark_keys = BENCHMARK_KEYS[:]
01601 options.ignore_code = self.ignore_code
01602 options.physical_checks = self.get_checks('physical_line')
01603 options.logical_checks = self.get_checks('logical_line')
01604 options.ast_checks = self.get_checks('tree')
01605 self.init_report()
01606
01607 def init_report(self, reporter=None):
01608 """Initialize the report instance."""
01609 self.options.report = (reporter or self.options.reporter)(self.options)
01610 return self.options.report
01611
01612 def check_files(self, paths=None):
01613 """Run all checks on the paths."""
01614 if paths is None:
01615 paths = self.paths
01616 report = self.options.report
01617 runner = self.runner
01618 report.start()
01619 try:
01620 for path in paths:
01621 if os.path.isdir(path):
01622 self.input_dir(path)
01623 elif not self.excluded(path):
01624 runner(path)
01625 except KeyboardInterrupt:
01626 print('... stopped')
01627 report.stop()
01628 return report
01629
01630 def input_file(self, filename, lines=None, expected=None, line_offset=0):
01631 """Run all checks on a Python source file."""
01632 if self.options.verbose:
01633 print('checking %s' % filename)
01634 fchecker = self.checker_class(
01635 filename, lines=lines, options=self.options)
01636 return fchecker.check_all(expected=expected, line_offset=line_offset)
01637
01638 def input_dir(self, dirname):
01639 """Check all files in this directory and all subdirectories."""
01640 dirname = dirname.rstrip('/')
01641 if self.excluded(dirname):
01642 return 0
01643 counters = self.options.report.counters
01644 verbose = self.options.verbose
01645 filepatterns = self.options.filename
01646 runner = self.runner
01647 for root, dirs, files in os.walk(dirname):
01648 if verbose:
01649 print('directory ' + root)
01650 counters['directories'] += 1
01651 for subdir in sorted(dirs):
01652 if self.excluded(subdir, root):
01653 dirs.remove(subdir)
01654 for filename in sorted(files):
01655 # contain a pattern that matches?
01656 if ((filename_match(filename, filepatterns) and
01657 not self.excluded(filename, root))):
01658 runner(os.path.join(root, filename))
01659
01660 def excluded(self, filename, parent=None):
01661 """
01662 Check if options.exclude contains a pattern that matches filename.
01663 """
01664 if not self.options.exclude:
01665 return False
01666 basename = os.path.basename(filename)
01667 if filename_match(basename, self.options.exclude):
01668 return True
01669 if parent:
01670 filename = os.path.join(parent, filename)
01671 return filename_match(filename, self.options.exclude)
01672
01673 def ignore_code(self, code):
01674 """
01675 Check if the error code should be ignored.
01676
01677 If 'options.select' contains a prefix of the error code,
01678 return False. Else, if 'options.ignore' contains a prefix of
01679 the error code, return True.
01680 """
01681 if len(code) < 4 and any(s.startswith(code)
01682 for s in self.options.select):
01683 return False
01684 return (code.startswith(self.options.ignore) and
01685 not code.startswith(self.options.select))
01686
01687 def get_checks(self, argument_name):
01688 """
01689 Find all globally visible functions where the first argument name
01690 starts with argument_name and which contain selected tests.
01691 """
01692 checks = []
01693 for check, attrs in _checks[argument_name].items():
01694 (codes, args) = attrs
01695 if any(not (code and self.ignore_code(code)) for code in codes):
01696 checks.append((check.__name__, check, args))
01697 return sorted(checks)
01698
01699
01700 def get_parser(prog='pep8', version=__version__):
01701 parser = OptionParser(prog=prog, version=version,
01702 usage="%prog [options] input ...")
01703 parser.config_options = [
01704 'exclude', 'filename', 'select', 'ignore', 'max-line-length',
01705 'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
01706 'show-source', 'statistics', 'verbose']
01707 parser.add_option('-v', '--verbose', default=0, action='count',
01708 help="print status messages, or debug with -vv")
01709 parser.add_option('-q', '--quiet', default=0, action='count',
01710 help="report only file names, or nothing with -qq")
01711 parser.add_option('-r', '--repeat', default=True, action='store_true',
01712 help="(obsolete) show all occurrences of the same error")
01713 parser.add_option('--first', action='store_false', dest='repeat',
01714 help="show first occurrence of each error")
01715 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
01716 help="exclude files or directories which match these "
01717 "comma separated patterns (default: %default)")
01718 parser.add_option('--filename', metavar='patterns', default='*.py',
01719 help="when parsing directories, only check filenames "
01720 "matching these comma separated patterns "
01721 "(default: %default)")
01722 parser.add_option('--select', metavar='errors', default='',
01723 help="select errors and warnings (e.g. E,W6)")
01724 parser.add_option('--ignore', metavar='errors', default='',
01725 help="skip errors and warnings (e.g. E4,W)")
01726 parser.add_option('--show-source', action='store_true',
01727 help="show source code for each error")
01728 parser.add_option('--show-pep8', action='store_true',
01729 help="show text of PEP 8 for each error "
01730 "(implies --first)")
01731 parser.add_option('--statistics', action='store_true',
01732 help="count errors and warnings")
01733 parser.add_option('--count', action='store_true',
01734 help="print total number of errors and warnings "
01735 "to standard error and set exit code to 1 if "
01736 "total is not null")
01737 parser.add_option('--max-line-length', type='int', metavar='n',
01738 default=MAX_LINE_LENGTH,
01739 help="set maximum allowed line length "
01740 "(default: %default)")
01741 parser.add_option('--hang-closing', action='store_true',
01742 help="hang closing bracket instead of matching "
01743 "indentation of opening bracket's line")
01744 parser.add_option('--format', metavar='format', default='default',
01745 help="set the error format [default|pylint|<custom>]")
01746 parser.add_option('--diff', action='store_true',
01747 help="report only lines changed according to the "
01748 "unified diff received on STDIN")
01749 group = parser.add_option_group("Testing Options")
01750 if os.path.exists(TESTSUITE_PATH):
01751 group.add_option('--testsuite', metavar='dir',
01752 help="run regression tests from dir")
01753 group.add_option('--doctest', action='store_true',
01754 help="run doctest on myself")
01755 group.add_option('--benchmark', action='store_true',
01756 help="measure processing speed")
01757 return parser
01758
01759
01760 def read_config(options, args, arglist, parser):
01761 """Read both user configuration and local configuration."""
01762 config = RawConfigParser()
01763
01764 user_conf = options.config
01765 if user_conf and os.path.isfile(user_conf):
01766 if options.verbose:
01767 print('user configuration: %s' % user_conf)
01768 config.read(user_conf)
01769
01770 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
01771 while tail:
01772 if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
01773 if options.verbose:
01774 print('local configuration: in %s' % parent)
01775 break
01776 parent, tail = os.path.split(parent)
01777
01778 pep8_section = parser.prog
01779 if config.has_section(pep8_section):
01780 option_list = dict([(o.dest, o.type or o.action)
01781 for o in parser.option_list])
01782
01783 # First, read the default values
01784 new_options, _ = parser.parse_args([])
01785
01786 # Second, parse the configuration
01787 for opt in config.options(pep8_section):
01788 if options.verbose > 1:
01789 print(" %s = %s" % (opt, config.get(pep8_section, opt)))
01790 if opt.replace('_', '-') not in parser.config_options:
01791 print("Unknown option: '%s'\n not in [%s]" %
01792 (opt, ' '.join(parser.config_options)))
01793 sys.exit(1)
01794 normalized_opt = opt.replace('-', '_')
01795 opt_type = option_list[normalized_opt]
01796 if opt_type in ('int', 'count'):
01797 value = config.getint(pep8_section, opt)
01798 elif opt_type == 'string':
01799 value = config.get(pep8_section, opt)
01800 else:
01801 assert opt_type in ('store_true', 'store_false')
01802 value = config.getboolean(pep8_section, opt)
01803 setattr(new_options, normalized_opt, value)
01804
01805 # Third, overwrite with the command-line options
01806 options, _ = parser.parse_args(arglist, values=new_options)
01807 options.doctest = options.testsuite = False
01808 return options
01809
01810
01811 def process_options(arglist=None, parse_argv=False, config_file=None,
01812 parser=None):
01813 """Process options passed either via arglist or via command line args."""
01814 if not arglist and not parse_argv:
01815 # Don't read the command line if the module is used as a library.
01816 arglist = []
01817 if not parser:
01818 parser = get_parser()
01819 if not parser.has_option('--config'):
01820 if config_file is True:
01821 config_file = DEFAULT_CONFIG
01822 group = parser.add_option_group("Configuration", description=(
01823 "The project options are read from the [%s] section of the "
01824 "tox.ini file or the setup.cfg file located in any parent folder "
01825 "of the path(s) being processed. Allowed options are: %s." %
01826 (parser.prog, ', '.join(parser.config_options))))
01827 group.add_option('--config', metavar='path', default=config_file,
01828 help="user config file location (default: %default)")
01829 options, args = parser.parse_args(arglist)
01830 options.reporter = None
01831
01832 if options.ensure_value('testsuite', False):
01833 args.append(options.testsuite)
01834 elif not options.ensure_value('doctest', False):
01835 if parse_argv and not args:
01836 if options.diff or any(os.path.exists(name)
01837 for name in PROJECT_CONFIG):
01838 args = ['.']
01839 else:
01840 parser.error('input not specified')
01841 options = read_config(options, args, arglist, parser)
01842 options.reporter = parse_argv and options.quiet == 1 and FileReport
01843
01844 options.filename = options.filename and options.filename.split(',')
01845 options.exclude = options.exclude.split(',')
01846 options.select = options.select and options.select.split(',')
01847 options.ignore = options.ignore and options.ignore.split(',')
01848
01849 if options.diff:
01850 options.reporter = DiffReport
01851 stdin = stdin_get_value()
01852 options.selected_lines = parse_udiff(stdin, options.filename, args[0])
01853 args = sorted(options.selected_lines)
01854
01855 return options, args
01856
01857
01858 def _main():
01859 """Parse options and run checks on Python source."""
01860 pep8style = StyleGuide(parse_argv=True, config_file=True)
01861 options = pep8style.options
01862 if options.doctest or options.testsuite:
01863 from testsuite.support import run_tests
01864 report = run_tests(pep8style)
01865 else:
01866 report = pep8style.check_files()
01867 if options.statistics:
01868 report.print_statistics()
01869 if options.benchmark:
01870 report.print_benchmark()
01871 if options.testsuite and not options.quiet:
01872 report.print_results()
01873 if report.total_errors:
01874 if options.count:
01875 sys.stderr.write(str(report.total_errors) + '\n')
01876 sys.exit(1)
01877
01878 if __name__ == '__main__':
01879 _main()