xss.py
Go to the documentation of this file.
1 from htmllib import HTMLParser
2 from cgi import escape
3 from urlparse import urlparse
4 from formatter import AbstractFormatter
5 from htmlentitydefs import entitydefs
6 from xml.sax.saxutils import quoteattr
7 
8 def xssescape(text):
9  """Gets rid of < and > and & and, for good measure, :"""
10  return escape(text, quote=True).replace(':','&#58;')
11 
12 class XssCleaner(HTMLParser):
13  def __init__(self, fmt = AbstractFormatter):
14  HTMLParser.__init__(self, fmt)
15  self.result = ""
16  self.open_tags = []
17  # A list of the only tags allowed. Be careful adding to this. Adding
18  # "script," for example, would not be smart. 'img' is out by default
19  # because of the danger of IMG embedded commands, and/or web bugs.
20  self.permitted_tags = ['a', 'b', 'blockquote', 'br', 'i',
21  'li', 'ol', 'ul', 'p', 'cite']
22 
23  # A list of tags that require no closing tag.
24  self.requires_no_close = ['img', 'br']
25 
26  # A dictionary showing the only attributes allowed for particular tags.
27  # If a tag is not listed here, it is allowed no attributes. Adding
28  # "on" tags, like "onhover," would not be smart. Also be very careful
29  # of "background" and "style."
31  {'a':['href','title'],
32  'img':['src','alt'],
33  'blockquote':['type']}
34 
35  # The only schemes allowed in URLs (for href and src attributes).
36  # Adding "javascript" or "vbscript" to this list would not be smart.
37  self.allowed_schemes = ['http','https','ftp']
38  def handle_data(self, data):
39  if data:
40  self.result += xssescape(data)
41  def handle_charref(self, ref):
42  if len(ref) < 7 and ref.isdigit():
43  self.result += '&#%s;' % ref
44  else:
45  self.result += xssescape('&#%s' % ref)
46  def handle_entityref(self, ref):
47  if ref in entitydefs:
48  self.result += '&%s;' % ref
49  else:
50  self.result += xssescape('&%s' % ref)
51  def handle_comment(self, comment):
52  if comment:
53  self.result += xssescape("<!--%s-->" % comment)
54 
55  def handle_starttag(self, tag, method, attrs):
56  if tag not in self.permitted_tags:
57  self.result += xssescape("<%s>" % tag)
58  else:
59  bt = "<" + tag
60  if tag in self.allowed_attributes:
61  attrs = dict(attrs)
63  [x for x in self.allowed_attributes[tag] if x in attrs \
64  and len(attrs[x]) > 0]
65  for attribute in self.allowed_attributes_here:
66  if attribute in ['href', 'src', 'background']:
67  if self.url_is_acceptable(attrs[attribute]):
68  bt += ' %s="%s"' % (attribute, attrs[attribute])
69  else:
70  bt += ' %s=%s' % \
71  (xssescape(attribute), quoteattr(attrs[attribute]))
72  if bt == "<a" or bt == "<img":
73  return
74  if tag in self.requires_no_close:
75  bt += "/"
76  bt += ">"
77  self.result += bt
78  self.open_tags.insert(0, tag)
79 
80  def handle_endtag(self, tag, attrs):
81  bracketed = "</%s>" % tag
82  if tag not in self.permitted_tags:
83  self.result += xssescape(bracketed)
84  elif tag in self.open_tags:
85  self.result += bracketed
86  self.open_tags.remove(tag)
87 
88  def unknown_starttag(self, tag, attributes):
89  self.handle_starttag(tag, None, attributes)
90  def unknown_endtag(self, tag):
91  self.handle_endtag(tag, None)
92  def url_is_acceptable(self,url):
93  ### Requires all URLs to be "absolute."
94  parsed = urlparse(url)
95  return parsed[0] in self.allowed_schemes and '.' in parsed[1]
96  def strip(self, rawstring):
97  """Returns the argument stripped of potentially harmful HTML or Javascript code"""
98  self.result = ""
99  self.feed(rawstring)
100  for endtag in self.open_tags:
101  if endtag not in self.requires_no_close:
102  self.result += "</%s>" % endtag
103  return self.result
104  def xtags(self):
105  """Returns a printable string informing the user which tags are allowed"""
106  self.permitted_tags.sort()
107  tg = ""
108  for x in self.permitted_tags:
109  tg += "<" + x
110  if x in self.allowed_attributes:
111  for y in self.allowed_attributes[x]:
112  tg += ' %s=""' % y
113  tg += "> "
114  return xssescape(tg.strip())
def handle_endtag(self, tag, attrs)
Definition: xss.py:80
def strip(self, rawstring)
Definition: xss.py:96
def __init__(self, fmt=AbstractFormatter)
Definition: xss.py:13
def handle_comment(self, comment)
Definition: xss.py:51
def xssescape(text)
Definition: xss.py:8
def handle_entityref(self, ref)
Definition: xss.py:46
def xtags(self)
Definition: xss.py:104
def unknown_endtag(self, tag)
Definition: xss.py:90
def handle_data(self, data)
Definition: xss.py:38
def url_is_acceptable(self, url)
Definition: xss.py:92
def unknown_starttag(self, tag, attributes)
Definition: xss.py:88
def handle_starttag(self, tag, method, attrs)
Definition: xss.py:55
def handle_charref(self, ref)
Definition: xss.py:41


webui
Author(s): Scott Hassan
autogenerated on Mon Jun 10 2019 15:51:24