00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 import string
00042 import re
00043 import os
00044 import urllib
00045 import urlparse
00046 from types   import StringTypes, TupleType
00047 from xml.dom import EMPTY_PREFIX, EMPTY_NAMESPACE
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 _reWhitespace  = re.compile('\s')
00058 _reWhitespaces = re.compile('\s+')
00059 
00060 _reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") 
00061 
00062 
00063 
00064 
00065 
00066 
00067 
00068 
00069 
00070 
00071 def removeWhitespaces (strValue):
00072     return _reWhitespaces.sub('', strValue)
00073 
00074 
00075 
00076 
00077 
00078 def collapseString (strValue, lstrip=1, rstrip=1):
00079     collStr = _reWhitespaces.sub(' ', strValue)
00080     if lstrip and rstrip:
00081         return collStr.strip()
00082     elif lstrip:
00083         return collStr.lstrip()
00084     elif rstrip:
00085         return collStr.rstrip()
00086     else:
00087         return collStr
00088         
00089 
00090 
00091 
00092 
00093 
00094 def normalizeString (strValue):
00095     return _reWhitespace.sub(' ', strValue)
00096 
00097 
00098 
00099 
00100 
00101 def processWhitespaceAction (strValue, wsAction, lstrip=1, rstrip=1):
00102     if wsAction == "collapse":
00103         return collapseString(strValue, lstrip, rstrip)
00104     elif wsAction == "replace":
00105         return normalizeString(strValue)
00106     else:
00107         return strValue
00108     
00109 
00110 
00111 
00112 
00113 def convertToUrl (fileOrUrl):
00114     matchObject = _reSplitUrlApplication.match(fileOrUrl)
00115     if matchObject:
00116         
00117         if matchObject.group(1) == 'file':
00118             path = re.sub(':', '|', matchObject.group(2)) 
00119             url = "file:" + path
00120         else:
00121             url = fileOrUrl
00122     elif not os.path.isfile(fileOrUrl):
00123         
00124         url = fileOrUrl
00125     else:
00126         
00127 
00128         url = urllib.pathname2url (fileOrUrl)
00129 
00130     return url
00131 
00132 
00133 
00134 
00135 
00136 def convertToAbsUrl (fileOrUrl, baseUrl):
00137     if fileOrUrl == "" and baseUrl != "":
00138         absUrl = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), baseUrl, "__NO_FILE__"))
00139     elif os.path.isfile(fileOrUrl):
00140         absUrl = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), fileOrUrl))
00141     else:
00142         matchObject = _reSplitUrlApplication.match(fileOrUrl)
00143         if matchObject:
00144             
00145             if matchObject.group(1) == 'file':
00146                 path = re.sub(':', '|', matchObject.group(2)) 
00147                 absUrl = "file:" + path
00148             else:
00149                 absUrl = fileOrUrl
00150         else:
00151             
00152             if baseUrl != "":
00153                 absUrl = urlparse.urljoin (baseUrl, fileOrUrl)
00154             else:
00155                 absUrl = fileOrUrl
00156 
00157     return absUrl
00158 
00159 
00160 
00161 
00162 def normalizeFilter (filterVar):
00163     if filterVar == None or filterVar == '*':
00164         filterVar = ("*",)
00165     elif not isinstance(filterVar, TupleType):
00166         filterVar = (filterVar,)
00167     return filterVar
00168 
00169 
00170 
00171 
00172 
00173 
00174 def nsNameToQName (nsLocalName, curNs):
00175     """Convert a tuple '(namespace, localName)' to a string 'prefix:localName'
00176     
00177     Input parameter:
00178         nsLocalName:   tuple '(namespace, localName)' to be converted
00179         curNs:         list of current namespaces
00180     Returns the corresponding string 'prefix:localName' for 'nsLocalName'.
00181     """
00182     ns = nsLocalName[0]
00183     for prefix, namespace in curNs:
00184         if ns == namespace:
00185             if prefix != None:
00186                 return "%s:%s" %(prefix, nsLocalName[1])
00187             else:
00188                 return "%s" %nsLocalName[1]
00189     else:
00190         if ns == None:
00191             return nsLocalName[1]
00192         else:
00193             raise LookupError, "Prefix for namespaceURI '%s' not found!" % (ns)
00194 
00195 
00196 def splitQName (qName):
00197     """Split the given 'qName' into prefix/namespace and local name.
00198 
00199     Input parameter:
00200         'qName':  contains a string 'prefix:localName' or '{namespace}localName'
00201     Returns a tuple (prefixOrNamespace, localName)
00202     """
00203     namespaceEndIndex = string.find (qName, '}')
00204     if namespaceEndIndex != -1:
00205         prefix     = qName[1:namespaceEndIndex]
00206         localName  = qName[namespaceEndIndex+1:]
00207     else:
00208         namespaceEndIndex = string.find (qName, ':')
00209         if namespaceEndIndex != -1:
00210             prefix     = qName[:namespaceEndIndex]
00211             localName  = qName[namespaceEndIndex+1:]
00212         else:
00213             prefix     = None
00214             localName  = qName
00215     return prefix, localName
00216 
00217 
00218 def toClarkQName (tupleOrLocalName):
00219     """converts a tuple (namespace, localName) into clark notation {namespace}localName
00220        qNames without namespace remain unchanged
00221 
00222     Input parameter:
00223         'tupleOrLocalName':  tuple '(namespace, localName)' to be converted
00224     Returns a string {namespace}localName
00225     """
00226     if isinstance(tupleOrLocalName, TupleType):
00227         if tupleOrLocalName[0] != EMPTY_NAMESPACE:
00228             return "{%s}%s" %(tupleOrLocalName[0], tupleOrLocalName[1])
00229         else:
00230             return tupleOrLocalName[1]
00231     else:
00232         return tupleOrLocalName
00233     
00234     
00235 def splitClarkQName (qName):
00236     """converts clark notation {namespace}localName into a tuple (namespace, localName)
00237 
00238     Input parameter:
00239         'qName':  {namespace}localName to be converted
00240     Returns prefix and localName as separate strings
00241     """
00242     namespaceEndIndex = string.find (qName, '}')
00243     if namespaceEndIndex != -1:
00244         prefix     = qName[1:namespaceEndIndex]
00245         localName  = qName[namespaceEndIndex+1:]
00246     else:
00247         prefix     = None
00248         localName  = qName
00249     return prefix, localName
00250     
00251     
00252 
00253 
00254 
00255 
00256 
00257 _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
00258 
00259 _escapeDict = {
00260     "&": "&",
00261     "<": "<",
00262     ">": ">",
00263     '"': """,
00264 }
00265 
00266 
00267 def _raiseSerializationError(text):
00268     raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
00269 
00270 
00271 def _encode(text, encoding):
00272     try:
00273         return text.encode(encoding)
00274     except AttributeError:
00275         return text 
00276 
00277 
00278 def _encodeEntity(text, pattern=_escape):
00279     
00280     def escapeEntities(m, map=_escapeDict):
00281         out = []
00282         append = out.append
00283         for char in m.group():
00284             text = map.get(char)
00285             if text is None:
00286                 text = "&#%d;" % ord(char)
00287             append(text)
00288         return string.join(out, "")
00289     try:
00290         return _encode(pattern.sub(escapeEntities, text), "ascii")
00291     except TypeError:
00292         _raise_serialization_error(text)
00293 
00294 
00295 def escapeCdata(text, encoding=None, replace=string.replace):
00296     
00297     try:
00298         if encoding:
00299             try:
00300                 text = _encode(text, encoding)
00301             except UnicodeError:
00302                 return _encodeEntity(text)
00303         text = replace(text, "&", "&")
00304         text = replace(text, "<", "<")
00305         text = replace(text, ">", ">")
00306         return text
00307     except (TypeError, AttributeError):
00308         _raiseSerializationError(text)
00309 
00310 
00311 def escapeAttribute(text, encoding=None, replace=string.replace):
00312     
00313     try:
00314         if encoding:
00315             try:
00316                 text = _encode(text, encoding)
00317             except UnicodeError:
00318                 return _encodeEntity(text)
00319         text = replace(text, "&", "&")
00320         text = replace(text, "'", "'") 
00321         text = replace(text, "\"", """)
00322         text = replace(text, "<", "<")
00323         text = replace(text, ">", ">")
00324         return text
00325     except (TypeError, AttributeError):
00326         _raiseSerializationError(text)
00327 
00328 
00329 
00330 
00331 
00332 
00333 
00334 
00335 
00336 class QNameTuple(tuple):
00337     def __str__(self):
00338         if self[0] != EMPTY_PREFIX:
00339             return "%s:%s" %(self[0],self[1])
00340         else:
00341             return self[1]
00342     
00343 
00344 def QNameTupleFactory(initValue):
00345     if isinstance(initValue, StringTypes):
00346         separatorIndex = string.find (initValue, ':')
00347         if separatorIndex != -1:
00348             initValue = (initValue[:separatorIndex], initValue[separatorIndex+1:])
00349         else:
00350            initValue = (EMPTY_PREFIX, initValue)
00351     return QNameTuple(initValue)
00352 
00353 
00354 
00355 
00356 
00357 class NsNameTuple(tuple):
00358     def __str__(self):
00359         if self[0] != EMPTY_NAMESPACE:
00360             return "{%s}%s" %(self[0],self[1])
00361         elif self[1] != None:
00362             return self[1]
00363         else:
00364             return "None"
00365 
00366 
00367 def NsNameTupleFactory(initValue):
00368     if isinstance(initValue, StringTypes):
00369         initValue = splitClarkQName(initValue)
00370     elif initValue == None:
00371         initValue = (EMPTY_NAMESPACE, initValue)
00372     return NsNameTuple(initValue)
00373 
00374