00001 #!/usr/bin/env python 00002 # 00003 # Copyright 2015 Airbus 00004 # Copyright 2017 Fraunhofer Institute for Manufacturing Engineering and Automation (IPA) 00005 # 00006 # Licensed under the Apache License, Version 2.0 (the "License"); 00007 # you may not use this file except in compliance with the License. 00008 # You may obtain a copy of the License at 00009 # 00010 # http://www.apache.org/licenses/LICENSE-2.0 00011 # 00012 # Unless required by applicable law or agreed to in writing, software 00013 # distributed under the License is distributed on an "AS IS" BASIS, 00014 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 # See the License for the specific language governing permissions and 00016 # limitations under the License. 00017 00018 00019 class Attrib: 00020 id = 'id' 00021 type = 'type' 00022 href = 'href' 00023 00024 class Metadata: 00025 head = "head" 00026 title = "title" 00027 base = "base" 00028 link = "link" 00029 meta = "meta" 00030 style = "style" 00031 00032 class Sections: 00033 body = "body" 00034 article = "article" 00035 section = "section" 00036 nav = "nav" 00037 aside = "aside" 00038 h1 = "h1" 00039 h2 = "h2" 00040 h3 = "h3" 00041 h4 = "h4" 00042 h5 = "h5" 00043 h6 = "h6" 00044 header = "header" 00045 footer = "footer" 00046 address = "address" 00047 00048 class Grouping: 00049 p = "p" 00050 pre = "pre" 00051 blockquote = "blockquote" 00052 ol = "ol" 00053 ul = "ul" 00054 li = "li" 00055 dl = "dl" 00056 dt = "dt" 00057 dd = "dd" 00058 figure = "figure" 00059 figcaption = "figcaption" 00060 div = "div" 00061 main = "main" 00062 hr = "hr" 00063 00064 class Text: 00065 a = "a" 00066 em = "em" 00067 strong = "strong" 00068 cite = "cite" 00069 q = "q" 00070 dfn = "dfn" 00071 abbr = "abbr" 00072 data = "data" 00073 time = "time" 00074 code = "code" 00075 var = "var" 00076 samp = "samp" 00077 kbd = "kbd" 00078 mark = "mark" 00079 ruby = "ruby" 00080 rb = "rb" 00081 rt = "rt" 00082 rp = "rp" 00083 rtc = "rtc" 00084 bdi = "bdi" 00085 bdo = "bdo" 00086 span = "span" 00087 br = "br" 00088 wbr = "wbr" 00089 small = "small" 00090 i = "i" 00091 b = "b" 00092 u = "u" 00093 s = "s" 00094 sub = "sub" 00095 sup = "sup" 00096 00097 class Edits: 00098 ins = "ins" 00099 delete = "delete" 00100 00101 class EmbeddedContent: 00102 img = "img" 00103 embed = "embed" 00104 object = "object" 00105 param = "param" 00106 video = "video" 00107 audio = "audio" 00108 source = "source" 00109 track = "track" 00110 map = "map" 00111 area = "area" 00112 iframe = "iframe" 00113 00114 class Tables: 00115 table = "table" 00116 tr = "tr" 00117 td = "td" 00118 th = "th" 00119 caption = "caption" 00120 tbody = "tbody" 00121 thead = "thead" 00122 tfoot = "tfoot" 00123 colgroup = "colgroup" 00124 col = "col" 00125 00126 class Forms: 00127 form = "form" 00128 input = "input" 00129 textarea = "textarea" 00130 select = "select" 00131 option = "option" 00132 optgroup = "optgroup" 00133 datalist = "datalist" 00134 label = "label" 00135 fieldset = "fieldset" 00136 legend = "legend" 00137 button = "button" 00138 output = "output" 00139 progress = "progress" 00140 meter = "meter" 00141 keygen = "keygen" 00142 00143 class Scripting: 00144 script = "script" 00145 noscript = "noscript" 00146 template = "template" 00147 canvas = "canvas" 00148 00149 from xml.etree import ElementTree as ET 00150 from xml.etree.ElementTree import tostring 00151 from xml.etree.ElementTree import parse 00152 00153 HtmlElementTree = ET.ElementTree 00154 HtmlElement = ET.Element 00155 00156 def indent(elem, level=0): 00157 00158 i = "\n" + level*" " 00159 if len(elem): 00160 if not elem.text or not elem.text.strip(): 00161 elem.text = i + " " 00162 if not elem.tail or not elem.tail.strip(): 00163 elem.tail = i 00164 for elem in elem: 00165 indent(elem, level+1) 00166 if not elem.tail or not elem.tail.strip(): 00167 elem.tail = i 00168 else: 00169 if level and (not elem.tail or not elem.tail.strip()): 00170 elem.tail = i 00171 00172 def loadHtml(htmlfile): 00173 f = parse(htmlfile) 00174 return f.getroot() 00175 00176 class HTMLException(HtmlElement): 00177 def __init__(self, ex, parent=None): 00178 HtmlElement.__init__(self, Grouping.p, attrib={"id":"exception"}) 00179 self.text = str(ex) 00180 00181 if parent is not None: 00182 parent.append(self) 00183 00184