xmldiff.py
Go to the documentation of this file.
00001 ##########################################################################
00002 #
00003 #  xmldiff
00004 #
00005 #    Simple utility script to enable a diff of two XML files in a way 
00006 #     that ignores the order or attributes and elements.
00007 #
00008 #    Dale Lane (email@dalelane.co.uk)
00009 #     6 Oct 2014
00010 #
00011 ##########################################################################
00012 #
00013 #  Overview
00014 #    The approach is to sort both files by attribute and element, and 
00015 #     then reuse an existing diff implementation on the sorted files.
00016 #
00017 #  Arguments
00018 #    <diffcommand> the command that should be run to diff the sorted files
00019 #    <filename1>   the first XML file to diff
00020 #    <filename2>   the second XML file to diff
00021 #
00022 #  Background
00023 #    http://dalelane.co.uk/blog/?p=3225
00024 #
00025 ##########################################################################
00026 
00027 import os, sys, subprocess, platform
00028 import lxml.etree as le
00029 from operator import attrgetter
00030 
00031 #
00032 # Prepares the location of the temporary file that will be created by xmldiff
00033 def createFileObj(prefix, name):
00034     return { 
00035         "filename" : os.path.abspath(name),
00036         "tmpfilename" : "." + prefix + "." + os.path.basename(name)
00037     }
00038 
00039 
00040 #
00041 # Function to sort XML elements by their text contents
00042 def sortbytext(elem):
00043     text = elem.text
00044     if text:
00045         return text
00046     else:
00047         return ''
00048 
00049 def sortbytag(elem):
00050     keys = [elem.tag]
00051     for key in sorted(elem.keys()):
00052         keys.append(elem.get(key))
00053     return keys
00054 
00055 #
00056 # Function to sort XML attributes alphabetically by key
00057 #  The original item is left unmodified, and it's attributes are 
00058 #  copied to the provided sorteditem
00059 def sortAttrs(item, sorteditem):
00060     attrkeys = sorted(item.keys())
00061     for key in attrkeys:
00062         sorteditem.set(key, item.get(key))
00063 
00064 
00065 # 
00066 # Function to sort XML elements
00067 #  The sorted elements will be added as children of the provided newroot
00068 #  This is a recursive function, and will be called on each of the children
00069 #  of items.
00070 def sortElements(items, newroot):
00071     # The intended sort order is to sort by XML element name
00072     #  If more than one element has the same name, we want to 
00073     #   sort by their text contents.
00074     #  If more than one element has the same name and they do 
00075     #   not contain any text contents, we want to sort by the 
00076     #   value of their ID attribute.
00077     #  If more than one element has the same name, but has 
00078     #   no text contents or ID attribute, their order is left
00079     #   unmodified.
00080     #
00081     # We do this by performing three sorts in the reverse order
00082     items = sorted(items, key=sortbytext)
00083     items = sorted(items, key=sortbytag)
00084 
00085     # Once sorted, we sort each of the items
00086     for item in items:
00087         # Create a new item to represent the sorted version 
00088         #  of the next item, and copy the tag name and contents
00089         newitem = le.Element(item.tag)
00090         if item.text and item.text.isspace() == False:
00091             newitem.text = item.text
00092 
00093         # Copy the attributes (sorted by key) to the new item
00094         sortAttrs(item, newitem)
00095 
00096         # Copy the children of item (sorted) to the new item
00097         sortElements(list(item), newitem)
00098 
00099         # Append this sorted item to the sorted root
00100         newroot.append(newitem)
00101 
00102 
00103 # 
00104 # Function to sort the provided XML file
00105 #  fileobj.filename will be left untouched
00106 #  A new sorted copy of it will be created at fileobj.tmpfilename 
00107 def sortFile(fileobj):
00108     with open(fileobj['filename'], 'r') as original:
00109         # parse the XML file and get a pointer to the top
00110         xmldoc = le.parse(original)
00111         xmlroot = xmldoc.getroot()
00112 
00113         # create a new XML element that will be the top of 
00114         #  the sorted copy of the XML file
00115         newxmlroot = le.Element(xmlroot.tag)
00116 
00117         # create the sorted copy of the XML file
00118         sortAttrs(xmlroot, newxmlroot)
00119         sortElements(list(xmlroot), newxmlroot)
00120 
00121         # write the sorted XML file to the temp file
00122         newtree = le.ElementTree(newxmlroot)
00123         with open(fileobj['tmpfilename'], 'wb') as newfile:
00124             newtree.write(newfile, pretty_print=True)
00125 
00126 
00127 #
00128 # sort each of the specified files
00129 def compareFiles(filename1, filename2):
00130     filefrom = createFileObj("from", filename1)
00131     sortFile(filefrom)
00132     fileto = createFileObj("to", filename2)
00133     sortFile(fileto)
00134 
00135     #
00136     # invoke the requested diff command to compare the two sorted files
00137     if platform.system() == "Windows":
00138         sp = subprocess.Popen([ "cmd", "/c", 'diff ' + filefrom['tmpfilename'] + " " + fileto['tmpfilename'] ], stdout=subprocess.PIPE, shell=True)
00139     else:
00140         sp = subprocess.Popen([ 'diff ' + os.path.abspath(filefrom['tmpfilename']) + " " + os.path.abspath(fileto['tmpfilename']) ], stdout=subprocess.PIPE, shell=True)
00141     stdout = sp.communicate()[0]
00142 
00143     #
00144     # cleanup - delete the temporary sorted files after the diff terminates
00145     os.remove(filefrom['tmpfilename'])
00146     os.remove(fileto['tmpfilename'])
00147     return stdout


naoqi_tools
Author(s): Mikael Arguedas
autogenerated on Thu Aug 27 2015 14:05:48