Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 from StringIO import StringIO
00034 
00035 def jsmin(js):
00036     ins = StringIO(js)
00037     outs = StringIO()
00038     JavascriptMinify().minify(ins, outs)
00039     str = outs.getvalue()
00040     if len(str) > 0 and str[0] == '\n':
00041         str = str[1:]
00042     return str
00043 
00044 def isAlphanum(c):
00045     """return true if the character is a letter, digit, underscore,
00046            dollar sign, or non-ASCII character.
00047     """
00048     return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or
00049             (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126));
00050 
00051 class UnterminatedComment(Exception):
00052     pass
00053 
00054 class UnterminatedStringLiteral(Exception):
00055     pass
00056 
00057 class UnterminatedRegularExpression(Exception):
00058     pass
00059 
00060 class JavascriptMinify(object):
00061 
00062     def _outA(self):
00063         self.outstream.write(self.theA)
00064     def _outB(self):
00065         self.outstream.write(self.theB)
00066 
00067     def _get(self):
00068         """return the next character from stdin. Watch out for lookahead. If
00069            the character is a control character, translate it to a space or
00070            linefeed.
00071         """
00072         c = self.theLookahead
00073         self.theLookahead = None
00074         if c == None:
00075             c = self.instream.read(1)
00076         if c >= ' ' or c == '\n':
00077             return c
00078         if c == '': 
00079             return '\000'
00080         if c == '\r':
00081             return '\n'
00082         return ' '
00083 
00084     def _peek(self):
00085         self.theLookahead = self._get()
00086         return self.theLookahead
00087 
00088     def _next(self):
00089         """get the next character, excluding comments. peek() is used to see
00090            if a '/' is followed by a '/' or '*'.
00091         """
00092         c = self._get()
00093         if c == '/':
00094             p = self._peek()
00095             if p == '/':
00096                 c = self._get()
00097                 while c > '\n':
00098                     c = self._get()
00099                 return c
00100             if p == '*':
00101                 c = self._get()
00102                 while 1:
00103                     c = self._get()
00104                     if c == '*':
00105                         if self._peek() == '/':
00106                             self._get()
00107                             return ' '
00108                     if c == '\000':
00109                         raise UnterminatedComment()
00110 
00111         return c
00112 
00113     def _action(self, action):
00114         """do something! What you do is determined by the argument:
00115            1   Output A. Copy B to A. Get the next B.
00116            2   Copy B to A. Get the next B. (Delete A).
00117            3   Get the next B. (Delete B).
00118            action treats a string as a single character. Wow!
00119            action recognizes a regular expression if it is preceded by ( or , or =.
00120         """
00121         if action <= 1:
00122             self._outA()
00123 
00124         if action <= 2:
00125             self.theA = self.theB
00126             if self.theA == "'" or self.theA == '"':
00127                 while 1:
00128                     self._outA()
00129                     self.theA = self._get()
00130                     if self.theA == self.theB:
00131                         break
00132                     if self.theA <= '\n':
00133                         raise UnterminatedStringLiteral()
00134                     if self.theA == '\\':
00135                         self._outA()
00136                         self.theA = self._get()
00137 
00138 
00139         if action <= 3:
00140             self.theB = self._next()
00141             if self.theB == '/' and (self.theA == '(' or self.theA == ',' or
00142                                      self.theA == '=' or self.theA == ':' or
00143                                      self.theA == '[' or self.theA == '?' or
00144                                      self.theA == '!' or self.theA == '&' or
00145                                      self.theA == '|'):
00146                 self._outA()
00147                 self._outB()
00148                 while 1:
00149                     self.theA = self._get()
00150                     if self.theA == '/':
00151                         break
00152                     elif self.theA == '\\':
00153                         self._outA()
00154                         self.theA = self._get()
00155                     elif self.theA <= '\n':
00156                         raise UnterminatedRegularExpression()
00157                     self._outA()
00158                 self.theB = self._next()
00159 
00160 
00161     def _jsmin(self):
00162         """Copy the input to the output, deleting the characters which are
00163            insignificant to JavaScript. Comments will be removed. Tabs will be
00164            replaced with spaces. Carriage returns will be replaced with linefeeds.
00165            Most spaces and linefeeds will be removed.
00166         """
00167         self.theA = '\n'
00168         self._action(3)
00169 
00170         while self.theA != '\000':
00171             if self.theA == ' ':
00172                 if isAlphanum(self.theB):
00173                     self._action(1)
00174                 else:
00175                     self._action(2)
00176             elif self.theA == '\n':
00177                 if self.theB in ['{', '[', '(', '+', '-']:
00178                     self._action(1)
00179                 elif self.theB == ' ':
00180                     self._action(3)
00181                 else:
00182                     if isAlphanum(self.theB):
00183                         self._action(1)
00184                     else:
00185                         self._action(2)
00186             else:
00187                 if self.theB == ' ':
00188                     if isAlphanum(self.theA):
00189                         self._action(1)
00190                     else:
00191                         self._action(3)
00192                 elif self.theB == '\n':
00193                     if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:
00194                         self._action(1)
00195                     else:
00196                         if isAlphanum(self.theA):
00197                             self._action(1)
00198                         else:
00199                             self._action(3)
00200                 else:
00201                     self._action(1)
00202 
00203     def minify(self, instream, outstream):
00204         self.instream = instream
00205         self.outstream = outstream
00206         self.theA = None
00207         self.thaB = None
00208         self.theLookahead = None
00209 
00210         self._jsmin()
00211         self.instream.close()
00212 
00213 if __name__ == '__main__':
00214     import sys
00215     jsm = JavascriptMinify()
00216     jsm.minify(sys.stdin, sys.stdout)