xmldiff.py
Go to the documentation of this file.
1 ##########################################################################
2 #
3 # xmldiff
4 #
5 # Simple utility script to enable a diff of two XML files in a way
6 # that ignores the order or attributes and elements.
7 #
8 # Dale Lane (email@dalelane.co.uk)
9 # 6 Oct 2014
10 #
11 ##########################################################################
12 #
13 # Overview
14 # The approach is to sort both files by attribute and element, and
15 # then reuse an existing diff implementation on the sorted files.
16 #
17 # Arguments
18 # <diffcommand> the command that should be run to diff the sorted files
19 # <filename1> the first XML file to diff
20 # <filename2> the second XML file to diff
21 #
22 # Background
23 # http://dalelane.co.uk/blog/?p=3225
24 #
25 ##########################################################################
26 
27 import os
28 import sys
29 import subprocess
30 import platform
31 import lxml.etree as le
32 from operator import attrgetter
33 
34 
35 #
36 # Prepares the location of the temporary file that will be created by xmldiff
37 def createFileObj(prefix, name):
38  return {
39  "filename": os.path.abspath(name),
40  "tmpfilename": "." + prefix + "." + os.path.basename(name)
41  }
42 
43 
44 #
45 # Function to sort XML elements by their text contents
46 def sortbytext(elem):
47  text = elem.text
48  if text:
49  return text
50  else:
51  return ''
52 
53 
54 def sortbytag(elem):
55  keys = [elem.tag]
56  for key in sorted(elem.keys()):
57  keys.append(elem.get(key))
58  return keys
59 
60 
61 #
62 # Function to sort XML attributes alphabetically by key
63 # The original item is left unmodified, and it's attributes are
64 # copied to the provided sorteditem
65 def sortAttrs(item, sorteditem):
66  attrkeys = sorted(item.keys())
67  for key in attrkeys:
68  sorteditem.set(key, item.get(key))
69 
70 
71 #
72 # Function to sort XML elements
73 # The sorted elements will be added as children of the provided newroot
74 # This is a recursive function, and will be called on each of the children
75 # of items.
76 def sortElements(items, newroot):
77  # The intended sort order is to sort by XML element name
78  # If more than one element has the same name, we want to
79  # sort by their text contents.
80  # If more than one element has the same name and they do
81  # not contain any text contents, we want to sort by the
82  # value of their ID attribute.
83  # If more than one element has the same name, but has
84  # no text contents or ID attribute, their order is left
85  # unmodified.
86  #
87  # We do this by performing three sorts in the reverse order
88  items = sorted(items, key=sortbytext)
89  items = sorted(items, key=sortbytag)
90 
91  # Once sorted, we sort each of the items
92  for item in items:
93  # Create a new item to represent the sorted version
94  # of the next item, and copy the tag name and contents
95  newitem = le.Element(item.tag)
96  if item.text and item.text.isspace() is False:
97  newitem.text = item.text
98 
99  # Copy the attributes (sorted by key) to the new item
100  sortAttrs(item, newitem)
101 
102  # Copy the children of item (sorted) to the new item
103  sortElements(list(item), newitem)
104 
105  # Append this sorted item to the sorted root
106  newroot.append(newitem)
107 
108 
109 #
110 # Function to sort the provided XML file
111 # fileobj.filename will be left untouched
112 # A new sorted copy of it will be created at fileobj.tmpfilename
113 def sortFile(fileobj):
114  with open(fileobj['filename'], 'r') as original:
115  # parse the XML file and get a pointer to the top
116  xmldoc = le.parse(original)
117  xmlroot = xmldoc.getroot()
118 
119  # create a new XML element that will be the top of
120  # the sorted copy of the XML file
121  newxmlroot = le.Element(xmlroot.tag)
122 
123  # create the sorted copy of the XML file
124  sortAttrs(xmlroot, newxmlroot)
125  sortElements(list(xmlroot), newxmlroot)
126 
127  # write the sorted XML file to the temp file
128  newtree = le.ElementTree(newxmlroot)
129  with open(fileobj['tmpfilename'], 'wb') as newfile:
130  newtree.write(newfile, pretty_print=True)
131 
132 
133 #
134 # sort each of the specified files
135 def compareFiles(filename1, filename2):
136  filefrom = createFileObj("from", filename1)
137  sortFile(filefrom)
138  fileto = createFileObj("to", filename2)
139  sortFile(fileto)
140 
141  #
142  # invoke the requested diff command to compare the two sorted files
143  if platform.system() == "Windows":
144  sp = subprocess.Popen(["cmd", "/c", 'diff ' +
145  filefrom['tmpfilename'] + " " +
146  fileto['tmpfilename']],
147  stdout=subprocess.PIPE, shell=True)
148  else:
149  sp = subprocess.Popen(['diff ' +
150  os.path.abspath(filefrom['tmpfilename']) +
151  " " + os.path.abspath(fileto['tmpfilename'])],
152  stdout=subprocess.PIPE, shell=True)
153  stdout = sp.communicate()[0]
154 
155  #
156  # cleanup - delete the temporary sorted files after the diff terminates
157  os.remove(filefrom['tmpfilename'])
158  os.remove(fileto['tmpfilename'])
159  return stdout
def compareFiles(filename1, filename2)
Definition: xmldiff.py:135
def sortFile(fileobj)
Definition: xmldiff.py:113
def sortAttrs(item, sorteditem)
Definition: xmldiff.py:65
def createFileObj(prefix, name)
Definition: xmldiff.py:37
def sortbytext(elem)
Definition: xmldiff.py:46
def sortElements(items, newroot)
Definition: xmldiff.py:76
def sortbytag(elem)
Definition: xmldiff.py:54


naoqi_tools
Author(s): Mikael Arguedas
autogenerated on Thu Jul 16 2020 03:18:37