00001
00002 import xml.dom.minidom as minidom
00003 from sys import exit, argv, stderr, stdout
00004 import re
00005 import argparse
00006
00007 parser = argparse.ArgumentParser(description="Format XML")
00008 parser.add_argument('infile', nargs=1)
00009 parser.add_argument('outfile', nargs='?')
00010
00011 args = parser.parse_args()
00012
00013 f = open(args.infile[0],'r')
00014 text = f.read()
00015 f.close()
00016
00017 dom = minidom.parseString(text)
00018
00019 def contains_only_text(node):
00020 childNodes = node.childNodes[:]
00021 for child in childNodes:
00022 if child.nodeType != child.TEXT_NODE:
00023 return False
00024 return True
00025
00026 def foreach_tree(doc, root, func, level=0):
00027 func(doc, root, level)
00028
00029 childNodes = root.childNodes[:]
00030 for node in childNodes:
00031 foreach_tree(doc, node, func, level+1)
00032
00033 def strip_indent(doc, node, level):
00034 if node.nodeType == node.TEXT_NODE and re.match(r"^\s+$", node.nodeValue):
00035 node.parentNode.removeChild(node)
00036 node.unlink()
00037
00038 def strip_comment_whitespace(doc, node, level):
00039 if node.nodeType == node.COMMENT_NODE:
00040 node.nodeValue = re.sub(r"\s+", " ", node.nodeValue)
00041
00042 def strip_comments_completely(doc, node, level):
00043 if node.nodeType == node.COMMENT_NODE:
00044 node.parentNode.removeChild(node)
00045 node.unlink()
00046
00047 def strip_text_whitespace(doc, node, level):
00048 if node.nodeType == node.TEXT_NODE:
00049 node.nodeValue = re.sub(r"\s+", " ", node.nodeValue).strip()
00050
00051 def strip_text_completely(doc, node, level):
00052 if node.nodeType == node.TEXT_NODE:
00053 node.parentNode.removeChild(node)
00054 node.unlink()
00055
00056 def auto_indent(doc, node, level):
00057 if level > 0 and not contains_only_text(node.parentNode):
00058 node.parentNode.insertBefore(doc.createTextNode("\n%s" % (" "*4*level)), node)
00059 if node.nextSibling is None:
00060 node.parentNode.appendChild(doc.createTextNode("\n%s" % (" "*4*(level-1))))
00061
00062 def next_non_text_sibling(node):
00063 ret = node.nextSibling
00064 while ret is not None and ret.nodeType == node.TEXT_NODE:
00065 ret = ret.nextSibling
00066 return ret
00067
00068 def auto_space(doc, node, level):
00069 if level > 0 and node.childNodes is not None and len(node.childNodes) > 1 and next_non_text_sibling(node) is not None:
00070 node.parentNode.insertBefore(doc.createTextNode("\n"), node.nextSibling)
00071
00072 foreach_tree(dom, dom.documentElement, strip_indent)
00073 foreach_tree(dom, dom.documentElement, strip_comment_whitespace)
00074 foreach_tree(dom, dom.documentElement, strip_text_whitespace)
00075 foreach_tree(dom, dom.documentElement, auto_indent)
00076 foreach_tree(dom, dom.documentElement, auto_space)
00077
00078 if args.outfile is not None:
00079 f = open(args.outfile, 'w')
00080 f.truncate()
00081 else:
00082 f = stdout
00083
00084 f.write("<?xml version='1.0'?>\n")
00085 f.write(dom.documentElement.toxml())
00086 f.write("\n")
00087
00088 f.close()