00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 import sys
00044 import string
00045 import urllib
00046 from xml.dom import EMPTY_NAMESPACE, XMLNS_NAMESPACE
00047 from xml.parsers.expat import ExpatError
00048
00049 if sys.version_info[:2] >= (2,5):
00050 from xml.etree.ElementTree import ElementTree, _ElementInterface, XMLTreeBuilder, TreeBuilder
00051 from xml.etree import ElementInclude
00052 else:
00053 from elementtree.ElementTree import ElementTree, _ElementInterface, XMLTreeBuilder, TreeBuilder
00054 from elementtree import ElementInclude
00055 from ..genxmlif import XMLIF_ELEMENTTREE, GenXmlIfError
00056 from xmlifUtils import convertToAbsUrl, processWhitespaceAction, collapseString, toClarkQName, splitQName
00057 from xmlifBase import XmlIfBuilderExtensionBase
00058 from xmlifApi import XmlInterfaceBase
00059
00060
00061
00062
00063 class XmlInterfaceElementTree (XmlInterfaceBase):
00064
00065
00066
00067
00068 def __init__ (self, verbose, useCaching, processXInclude):
00069 XmlInterfaceBase.__init__ (self, verbose, useCaching, processXInclude)
00070 self.xmlIfType = XMLIF_ELEMENTTREE
00071 if self.verbose:
00072 print "Using elementtree interface module..."
00073
00074
00075 def createXmlTree (self, namespace, xmlRootTagName, attributeDict={}, publicId=None, systemId=None):
00076 rootNode = ElementExtension(toClarkQName(xmlRootTagName), attributeDict)
00077 rootNode.xmlIfExtSetParentNode(None)
00078 treeWrapper = self.treeWrapperClass(self, ElementTreeExtension(rootNode), self.useCaching)
00079 rootNodeWrapper = self.elementWrapperClass (rootNode, treeWrapper, [])
00080 return treeWrapper
00081
00082
00083 def parse (self, file, baseUrl="", ownerDoc=None):
00084 absUrl = convertToAbsUrl (file, baseUrl)
00085 fp = urllib.urlopen (absUrl)
00086 try:
00087 tree = ElementTreeExtension()
00088 treeWrapper = self.treeWrapperClass(self, tree, self.useCaching)
00089 parser = ExtXMLTreeBuilder(file, absUrl, self, treeWrapper)
00090 treeWrapper.getTree().parse(fp, parser)
00091 fp.close()
00092
00093
00094 if self.processXInclude:
00095 loaderInst = ExtXIncludeLoader (self.parse, absUrl, ownerDoc)
00096 try:
00097 ElementInclude.include(treeWrapper.getTree().getroot(), loaderInst.loader)
00098 except IOError, errInst:
00099 raise GenXmlIfError, "%s: IOError: %s" %(file, str(errInst))
00100
00101 except ExpatError, errstr:
00102 fp.close()
00103 raise GenXmlIfError, "%s: ExpatError: %s" %(file, str(errstr))
00104 except ElementInclude.FatalIncludeError, errInst:
00105 fp.close()
00106 raise GenXmlIfError, "%s: XIncludeError: %s" %(file, str(errInst))
00107
00108 return treeWrapper
00109
00110
00111 def parseString (self, text, baseUrl="", ownerDoc=None):
00112 absUrl = convertToAbsUrl ("", baseUrl)
00113 tree = ElementTreeExtension()
00114 treeWrapper = self.treeWrapperClass(self, tree, self.useCaching)
00115 parser = ExtXMLTreeBuilder("", absUrl, self, treeWrapper)
00116 parser.feed(text)
00117 treeWrapper.getTree()._setroot(parser.close())
00118
00119
00120 if self.processXInclude:
00121 loaderInst = ExtXIncludeLoader (self.parse, absUrl, ownerDoc)
00122 ElementInclude.include(treeWrapper.getTree().getroot(), loaderInst.loader)
00123
00124 return treeWrapper
00125
00126
00127
00128
00129
00130 class ElementTreeExtension (ElementTree):
00131
00132 def xmlIfExtGetRootNode (self):
00133 return self.getroot()
00134
00135
00136 def xmlIfExtCreateElement (self, nsName, attributeDict, curNs):
00137 clarkQName = toClarkQName(nsName)
00138 return ElementExtension (clarkQName, attributeDict)
00139
00140
00141 def xmlIfExtCloneTree (self, rootElementCopy):
00142 return self.__class__(element=rootElementCopy)
00143
00144
00145
00146
00147
00148 class ElementExtension (_ElementInterface):
00149
00150 def __init__ (self, xmlRootTagName, attributeDict):
00151 _ElementInterface.__init__(self, xmlRootTagName, attributeDict)
00152
00153
00154 def xmlIfExtUnlink (self):
00155 self.xmlIfExtElementWrapper = None
00156 self.__xmlIfExtParentElement = None
00157
00158
00159 def xmlIfExtCloneNode (self):
00160 nodeCopy = self.__class__(self.tag, self.attrib.copy())
00161 nodeCopy.text = self.text
00162 nodeCopy.tail = self.tail
00163 return nodeCopy
00164
00165
00166 def xmlIfExtGetTagName (self):
00167 return self.tag
00168
00169
00170 def xmlIfExtGetNamespaceURI (self):
00171 prefix, localName = splitQName(self.tag)
00172 return prefix
00173
00174
00175 def xmlIfExtGetParentNode (self):
00176 return self.__xmlIfExtParentElement
00177
00178
00179 def xmlIfExtSetParentNode (self, parentElement):
00180 self.__xmlIfExtParentElement = parentElement
00181
00182
00183 def xmlIfExtGetChildren (self, filterTag=None):
00184 if filterTag == None:
00185 return self.getchildren()
00186 else:
00187 clarkFilterTag = toClarkQName(filterTag)
00188 return self.findall(clarkFilterTag)
00189
00190
00191 def xmlIfExtGetFirstChild (self, filterTag=None):
00192
00193 if filterTag == None:
00194 children = self.getchildren()
00195 if children != []:
00196 element = children[0]
00197 else:
00198 element = None
00199 else:
00200 clarkFilterTag = toClarkQName(filterTag)
00201 element = self.find(clarkFilterTag)
00202
00203 return element
00204
00205
00206 def xmlIfExtGetElementsByTagName (self, filterTag=(None,None)):
00207 clarkFilterTag = toClarkQName(filterTag)
00208 descendants = []
00209 for node in self.xmlIfExtGetChildren():
00210 descendants.extend(node.getiterator(clarkFilterTag))
00211 return descendants
00212
00213
00214 def xmlIfExtGetIterator (self, filterTag=(None,None)):
00215 clarkFilterTag = toClarkQName(filterTag)
00216 return self.getiterator (clarkFilterTag)
00217
00218
00219 def xmlIfExtAppendChild (self, childElement):
00220 self.append (childElement)
00221 childElement.xmlIfExtSetParentNode(self)
00222
00223
00224 def xmlIfExtInsertBefore (self, childElement, refChildElement):
00225 self.insert (self.getchildren().index(refChildElement), childElement)
00226 childElement.xmlIfExtSetParentNode(self)
00227
00228
00229 def xmlIfExtRemoveChild (self, childElement):
00230 self.remove (childElement)
00231
00232
00233 def xmlIfExtInsertSubtree (self, refChildElement, subTree, insertSubTreeRootNode):
00234 if refChildElement != None:
00235 insertIndex = self.getchildren().index (refChildElement)
00236 else:
00237 insertIndex = 0
00238 if insertSubTreeRootNode:
00239 elementList = [subTree.xmlIfExtGetRootNode(),]
00240 else:
00241 elementList = subTree.xmlIfExtGetRootNode().xmlIfExtGetChildren()
00242 elementList.reverse()
00243 for element in elementList:
00244 self.insert (insertIndex, element)
00245 element.xmlIfExtSetParentNode(self)
00246
00247
00248 def xmlIfExtGetAttributeDict (self):
00249 attrDict = {}
00250 for attrName, attrValue in self.attrib.items():
00251 namespaceEndIndex = string.find (attrName, '}')
00252 if namespaceEndIndex != -1:
00253 attrName = (attrName[1:namespaceEndIndex], attrName[namespaceEndIndex+1:])
00254 else:
00255 attrName = (EMPTY_NAMESPACE, attrName)
00256 attrDict[attrName] = attrValue
00257 return attrDict
00258
00259
00260 def xmlIfExtGetAttribute (self, tupleOrAttrName):
00261 clarkQName = toClarkQName(tupleOrAttrName)
00262 if self.attrib.has_key(clarkQName):
00263 return self.attrib[clarkQName]
00264 else:
00265 return None
00266
00267
00268 def xmlIfExtSetAttribute (self, tupleOrAttrName, attributeValue, curNs):
00269 self.attrib[toClarkQName(tupleOrAttrName)] = attributeValue
00270
00271
00272 def xmlIfExtRemoveAttribute (self, tupleOrAttrName):
00273 clarkQName = toClarkQName(tupleOrAttrName)
00274 if self.attrib.has_key(clarkQName):
00275 del self.attrib[clarkQName]
00276
00277
00278 def xmlIfExtGetElementValueFragments (self, ignoreEmtpyStringFragments):
00279 elementValueList = []
00280 if self.text != None:
00281 elementValueList.append(self.text)
00282 for child in self.getchildren():
00283 if child.tail != None:
00284 elementValueList.append(child.tail)
00285 if ignoreEmtpyStringFragments:
00286 elementValueList = filter (lambda s: collapseString(s) != "", elementValueList)
00287 if elementValueList == []:
00288 elementValueList = ["",]
00289 return elementValueList
00290
00291
00292 def xmlIfExtGetElementText (self):
00293 if self.text != None:
00294 return self.text
00295 else:
00296 return ""
00297
00298
00299 def xmlIfExtGetElementTailText (self):
00300 if self.tail != None:
00301 return self.tail
00302 else:
00303 return ""
00304
00305
00306 def xmlIfExtSetElementValue (self, elementValue):
00307 self.text = elementValue
00308 for child in self.getchildren():
00309 child.tail = None
00310
00311
00312 def xmlIfExtProcessWsElementValue (self, wsAction):
00313 noOfTextFragments = reduce(lambda sum, child: sum + (child.tail != None), self.getchildren(), 0)
00314 noOfTextFragments += (self.text != None)
00315
00316 rstrip = 0
00317 lstrip = 1
00318 if self.text != None:
00319 if noOfTextFragments == 1:
00320 rstrip = 1
00321 self.text = processWhitespaceAction (self.text, wsAction, lstrip, rstrip)
00322 noOfTextFragments -= 1
00323 lstrip = 0
00324 for child in self.getchildren():
00325 if child.tail != None:
00326 if noOfTextFragments == 1:
00327 rstrip = 1
00328 child.tail = processWhitespaceAction (child.tail, wsAction, lstrip, rstrip)
00329 noOfTextFragments -= 1
00330 lstrip = 0
00331
00332
00333
00334
00335
00336
00337 class ExtXMLTreeBuilder (XMLTreeBuilder, XmlIfBuilderExtensionBase):
00338 def __init__(self, filePath, absUrl, xmlIf, treeWrapper):
00339 XMLTreeBuilder.__init__(self, target=TreeBuilder(element_factory=ElementExtension))
00340 self._parser.StartNamespaceDeclHandler = self._start_ns
00341 self._parser.EndNamespaceDeclHandler = self._end_ns
00342 self.namespaces = []
00343 XmlIfBuilderExtensionBase.__init__(self, filePath, absUrl, treeWrapper, xmlIf.elementWrapperClass)
00344
00345 def _start(self, tag, attrib_in):
00346 elem = XMLTreeBuilder._start(self, tag, attrib_in)
00347 self.start(elem)
00348
00349 def _start_list(self, tag, attrib_in):
00350 elem = XMLTreeBuilder._start_list(self, tag, attrib_in)
00351 self.start(elem, attrib_in)
00352
00353 def _end(self, tag):
00354 elem = XMLTreeBuilder._end(self, tag)
00355 self.end(elem)
00356
00357 def _start_ns(self, prefix, value):
00358 self.namespaces.insert(0, (prefix, value))
00359
00360 def _end_ns(self, prefix):
00361 assert self.namespaces.pop(0)[0] == prefix, "implementation confused"
00362
00363
00364 def start(self, element, attributes):
00365
00366 for i in range (0, len(attributes), 2):
00367 attrName = attributes[i]
00368 namespaceEndIndex = string.find (attrName, '}')
00369 if namespaceEndIndex != -1 and attrName[0] != "{":
00370 attributes[i] = '{' + attributes[i]
00371
00372
00373 XmlIfBuilderExtensionBase.startElementHandler (self, element, self._parser.ErrorLineNumber, self.namespaces[:], attributes)
00374 if len(self._target._elem) > 1:
00375 element.xmlIfExtSetParentNode (self._target._elem[-2])
00376 else:
00377 for namespace in self.namespaces:
00378 if namespace[1] != None:
00379 element.xmlIfExtElementWrapper.setAttribute((XMLNS_NAMESPACE, namespace[0]), namespace[1])
00380
00381
00382 def end(self, element):
00383 XmlIfBuilderExtensionBase.endElementHandler (self, element, self._parser.ErrorLineNumber)
00384
00385
00386
00387
00388
00389
00390 class ExtXIncludeLoader:
00391
00392 def __init__(self, parser, baseUrl, ownerDoc):
00393 self.parser = parser
00394 self.baseUrl = baseUrl
00395 self.ownerDoc = ownerDoc
00396
00397 def loader(self, href, parse, encoding=None):
00398 if parse == "xml":
00399 data = self.parser(href, self.baseUrl, self.ownerDoc).getTree().getroot()
00400 else:
00401 absUrl = convertToAbsUrl (href, self.baseUrl)
00402 fp = urllib.urlopen (absUrl)
00403 data = fp.read()
00404 if encoding:
00405 data = data.decode(encoding)
00406 fp.close()
00407 return data