xmldiff.py
Go to the documentation of this file.
00001 ##########################################################################
00002 #
00003 #  xmldiff
00004 #
00005 #    Simple utility script to enable a diff of two XML files in a way
00006 #     that ignores the order or attributes and elements.
00007 #
00008 #    Dale Lane (email@dalelane.co.uk)
00009 #     6 Oct 2014
00010 #
00011 ##########################################################################
00012 #
00013 #  Overview
00014 #    The approach is to sort both files by attribute and element, and
00015 #     then reuse an existing diff implementation on the sorted files.
00016 #
00017 #  Arguments
00018 #    <diffcommand> the command that should be run to diff the sorted files
00019 #    <filename1>   the first XML file to diff
00020 #    <filename2>   the second XML file to diff
00021 #
00022 #  Background
00023 #    http://dalelane.co.uk/blog/?p=3225
00024 #
00025 ##########################################################################
00026 
00027 import os
00028 import sys
00029 import subprocess
00030 import platform
00031 import lxml.etree as le
00032 from operator import attrgetter
00033 
00034 
00035 #
00036 # Prepares the location of the temporary file that will be created by xmldiff
00037 def createFileObj(prefix, name):
00038     return {
00039         "filename": os.path.abspath(name),
00040         "tmpfilename": "." + prefix + "." + os.path.basename(name)
00041     }
00042 
00043 
00044 #
00045 # Function to sort XML elements by their text contents
00046 def sortbytext(elem):
00047     text = elem.text
00048     if text:
00049         return text
00050     else:
00051         return ''
00052 
00053 
00054 def sortbytag(elem):
00055     keys = [elem.tag]
00056     for key in sorted(elem.keys()):
00057         keys.append(elem.get(key))
00058     return keys
00059 
00060 
00061 #
00062 # Function to sort XML attributes alphabetically by key
00063 # The original item is left unmodified, and it's attributes are
00064 # copied to the provided sorteditem
00065 def sortAttrs(item, sorteditem):
00066     attrkeys = sorted(item.keys())
00067     for key in attrkeys:
00068         sorteditem.set(key, item.get(key))
00069 
00070 
00071 #
00072 # Function to sort XML elements
00073 #  The sorted elements will be added as children of the provided newroot
00074 #  This is a recursive function, and will be called on each of the children
00075 #  of items.
00076 def sortElements(items, newroot):
00077     # The intended sort order is to sort by XML element name
00078     #  If more than one element has the same name, we want to
00079     #   sort by their text contents.
00080     #  If more than one element has the same name and they do
00081     #   not contain any text contents, we want to sort by the
00082     #   value of their ID attribute.
00083     #  If more than one element has the same name, but has
00084     #   no text contents or ID attribute, their order is left
00085     #   unmodified.
00086     #
00087     # We do this by performing three sorts in the reverse order
00088     items = sorted(items, key=sortbytext)
00089     items = sorted(items, key=sortbytag)
00090 
00091     # Once sorted, we sort each of the items
00092     for item in items:
00093         # Create a new item to represent the sorted version
00094         #  of the next item, and copy the tag name and contents
00095         newitem = le.Element(item.tag)
00096         if item.text and item.text.isspace() is False:
00097             newitem.text = item.text
00098 
00099         # Copy the attributes (sorted by key) to the new item
00100         sortAttrs(item, newitem)
00101 
00102         # Copy the children of item (sorted) to the new item
00103         sortElements(list(item), newitem)
00104 
00105         # Append this sorted item to the sorted root
00106         newroot.append(newitem)
00107 
00108 
00109 #
00110 # Function to sort the provided XML file
00111 #  fileobj.filename will be left untouched
00112 #  A new sorted copy of it will be created at fileobj.tmpfilename
00113 def sortFile(fileobj):
00114     with open(fileobj['filename'], 'r') as original:
00115         # parse the XML file and get a pointer to the top
00116         xmldoc = le.parse(original)
00117         xmlroot = xmldoc.getroot()
00118 
00119         # create a new XML element that will be the top of
00120         #  the sorted copy of the XML file
00121         newxmlroot = le.Element(xmlroot.tag)
00122 
00123         # create the sorted copy of the XML file
00124         sortAttrs(xmlroot, newxmlroot)
00125         sortElements(list(xmlroot), newxmlroot)
00126 
00127         # write the sorted XML file to the temp file
00128         newtree = le.ElementTree(newxmlroot)
00129         with open(fileobj['tmpfilename'], 'wb') as newfile:
00130             newtree.write(newfile, pretty_print=True)
00131 
00132 
00133 #
00134 # sort each of the specified files
00135 def compareFiles(filename1, filename2):
00136     filefrom = createFileObj("from", filename1)
00137     sortFile(filefrom)
00138     fileto = createFileObj("to", filename2)
00139     sortFile(fileto)
00140 
00141     #
00142     # invoke the requested diff command to compare the two sorted files
00143     if platform.system() == "Windows":
00144         sp = subprocess.Popen(["cmd", "/c", 'diff ' +
00145                               filefrom['tmpfilename'] + " " +
00146                               fileto['tmpfilename']],
00147                               stdout=subprocess.PIPE, shell=True)
00148     else:
00149         sp = subprocess.Popen(['diff ' +
00150                               os.path.abspath(filefrom['tmpfilename']) +
00151                               " " + os.path.abspath(fileto['tmpfilename'])],
00152                               stdout=subprocess.PIPE, shell=True)
00153     stdout = sp.communicate()[0]
00154 
00155     #
00156     # cleanup - delete the temporary sorted files after the diff terminates
00157     os.remove(filefrom['tmpfilename'])
00158     os.remove(fileto['tmpfilename'])
00159     return stdout


naoqi_tools
Author(s): Mikael Arguedas
autogenerated on Wed Aug 16 2017 02:28:16