$search
00001 #!/neo/opt/bin/python 00002 00003 import sys, string, os, getopt, pwd, signal, time, re 00004 import fcntl 00005 00006 import tstart 00007 00008 import db_trans 00009 from log import * 00010 import neo_cgi, neo_util 00011 import odb 00012 00013 eTransError = "eTransError" 00014 00015 DONE = 0 00016 DEBUG = 0 00017 00018 TIER2_DIV = 11 00019 TIER1_DIV = 11 * TIER2_DIV 00020 00021 if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0 00022 00023 def handleSignal(*arg): 00024 global DONE 00025 DONE = 1 00026 00027 def usage(): 00028 print "usage info!!" 00029 00030 def exceptionString(): 00031 import StringIO, traceback 00032 00033 ## get the traceback message 00034 sfp = StringIO.StringIO() 00035 traceback.print_exc(file=sfp) 00036 exception = sfp.getvalue() 00037 sfp.close() 00038 00039 return exception 00040 00041 class TransLoc: 00042 def __init__ (self, string_id, filename, location): 00043 self.string_id = string_id 00044 self.filename = filename 00045 self.location = location 00046 00047 class Translator: 00048 _HTML_TAG_RE = None 00049 _HTML_TAG_REGEX = '<[^!][^>]*?>' 00050 _HTML_CMT_RE = None 00051 _HTML_CMT_REGEX = '<!--.*?-->' 00052 _CS_TAG_RE = None 00053 _CS_TAG_REGEX = '<\\?.+?\\?>' 00054 00055 def __init__ (self): 00056 self.tdb = db_trans.trans_connect() 00057 00058 # configuration data ...... 00059 # - we should stop hardcoding this... - jeske 00060 00061 self.root = "testroot" 00062 self.languages = ['es', 'en'] 00063 00064 self.ignore_paths = ['tmpl/m'] # common place for mockups 00065 self.ignore_files = ['blah_ignore.cs'] # ignore clearsilver file 00066 00067 # ignore clearsilver javascript files 00068 self.ignore_patterns = ['tmpl/[^ ]*_js.cs'] 00069 00070 # ............................ 00071 00072 00073 if self.root is None: 00074 raise "Unable to determine installation root" 00075 00076 00077 if Translator._HTML_TAG_RE is None: 00078 Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL) 00079 if Translator._HTML_CMT_RE is None: 00080 Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL) 00081 if Translator._CS_TAG_RE is None: 00082 Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL) 00083 00084 self._html_state = 0 00085 00086 00087 def parseHTMLTag(self, data): 00088 # this is only called if we see a full tag in one parse... 00089 i = 0 00090 if len(data) == 0: return [] 00091 if data[0] in '/?': return [] 00092 while i < len(data) and data[i] not in ' \n\r\t>': i = i + 1 00093 if i == len(data): return [] 00094 tag = data[:i].lower() 00095 #print "Searching tag: %s" % data 00096 #print "Found tag: %s" % tag 00097 results = [] 00098 attrfind = re.compile( 00099 r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' 00100 r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?') 00101 k = i 00102 attrs = {} 00103 attrs_beg = {} 00104 while k < len(data): 00105 match = attrfind.match(data, k) 00106 if not match: break 00107 attrname, rest, attrvalue = match.group(1, 2, 3) 00108 if not rest: 00109 attrvalue = attrname 00110 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ 00111 attrvalue[:1] == '"' == attrvalue[-1:]: 00112 attrvalue = attrvalue[1:-1] 00113 attrname = attrname.lower() 00114 if attrs.has_key(attrname): 00115 log("Can't handle duplicate attrs: %s" % attrname) 00116 attrs[attrname] = attrvalue 00117 attrs_beg[attrname] = match.start(3) 00118 k = match.end(0) 00119 00120 find_l = [] 00121 if tag == "input": 00122 if attrs.get('type', "").lower() in ["submit", "button"]: 00123 find_l.append((attrs.get('value', ''), attrs_beg.get('value', 0))) 00124 00125 for s,k in find_l: 00126 if s: 00127 x = data[k:].find(s) 00128 if x != -1: results.append((s, x+k, 1)) 00129 00130 return results 00131 00132 def parseHTML(self, data, reset=1): 00133 if reset: self._html_state = 0 00134 if DEBUG: print "- %d ---------\n%s\n- E ---------" % (self._html_state, data) 00135 00136 results = [] 00137 i = 0 00138 n = len(data) 00139 # if we had state from the last parse... find it 00140 if self._html_state: 00141 if self._html_state == 2: 00142 x = string.find(data[i:], '-->') 00143 l = 3 00144 else: 00145 x = string.find(data[i:], '>') 00146 l = 1 00147 if x == -1: return results 00148 i = i + x + l 00149 self._html_state = 0 00150 while i < n: 00151 if DEBUG: print "MATCHING>%s<MATCHING" % data[i:] 00152 cmt_b = string.find(data[i:], '<!--') 00153 cmt_e = string.find(data[i:], '-->') 00154 tag_b = string.find(data[i:], '<') 00155 tag_e = string.find(data[i:], '>') 00156 if DEBUG: print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e) 00157 if cmt_b != -1 and cmt_b <= tag_b: 00158 x = i 00159 y = i+cmt_b-1 00160 while x < y and data[x] in string.whitespace: x+=1 00161 while y > x and data[y] in string.whitespace: y-=1 00162 results.append((data[x:y+1], x, 1)) 00163 if cmt_e == -1: # partial comment: 00164 self._html_state = 2 00165 break 00166 i = i + cmt_e + 3 00167 elif tag_b != -1: 00168 x = i 00169 y = i+tag_b-1 00170 while x < y and data[x] in string.whitespace: x+=1 00171 while y > x and data[y] in string.whitespace: y-=1 00172 results.append((data[x:y+1], x, 1)) 00173 if tag_e == -1: # partial tag 00174 self._html_state = 1 00175 break 00176 h_results = self.parseHTMLTag(data[i+tag_b+1:i+tag_e]) 00177 h_results = map(lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results) 00178 results = results + h_results 00179 i = i + tag_e + 1 00180 else: 00181 x = i 00182 y = n-1 00183 while x < y and data[x] in string.whitespace: x+=1 00184 while y > x and data[y] in string.whitespace: y-=1 00185 results.append((data[x:y+1], x, 1)) 00186 break 00187 return results 00188 00189 def parseCS(self, data): 00190 results = [] 00191 i = 0 00192 n = len(data) 00193 while i < n: 00194 m = Translator._CS_TAG_RE.search(data, i) 00195 if not m: 00196 # search for a partial... 00197 x = string.find(data[i:], '<?') 00198 if x == -1: 00199 results.append((data[i:], i)) 00200 else: 00201 results.append((data[i:x], i)) 00202 break 00203 (b, e) = m.span() 00204 if i != b: results.append((data[i:b], i)) 00205 i = e 00206 t_results = [] 00207 self._html_in = 0 00208 for (s, ofs) in results: 00209 r = self.parseHTML(s, reset=0) 00210 r = map(lambda x: (x[0], x[1] + ofs, x[2]), r) 00211 t_results = t_results + r 00212 return t_results 00213 00214 def descendHDF(self, obj, prefix): 00215 results = [] 00216 while obj is not None: 00217 if obj.value(): 00218 attrs = obj.attrs() 00219 attrs = map(lambda x: x[0], attrs) 00220 if "Lang" in attrs: 00221 if prefix: 00222 results.append((obj.value(), "%s.%s" % (prefix, obj.name()), 0)) 00223 else: 00224 results.append((obj.value(), "%s" % (obj.name()), 0)) 00225 if obj.child(): 00226 if prefix: 00227 results = results + self.descendHDF(obj.child(), "%s.%s" % (prefix, obj.name())) 00228 else: 00229 results = results + self.descendHDF(obj.child(), (obj.name())) 00230 obj = obj.next() 00231 return results 00232 00233 def parseHDF(self, data): 00234 # Ok, we handle HDF files specially.. the theory is, we only 00235 # extract entire HDF elements which have the attribute Lang 00236 hdf = neo_util.HDF() 00237 hdf.readString(data, 1) 00238 return self.descendHDF(hdf, "") 00239 00240 def handleFile(self, file): 00241 if file in self.ignore_files: return [] 00242 for a_re in self.ignore_patterns: 00243 if re.match(a_re,file): 00244 return [] 00245 fpath = self.root + '/' + file 00246 x = string.rfind(file, '.') 00247 if x == -1: return [] 00248 data = open(fpath, 'r').read() 00249 ext = file[x:] 00250 strings = [] 00251 if ext in ['.cst', '.cs']: 00252 strings = self.parseCS(data) 00253 elif ext in ['.html', '.htm']: 00254 strings = self.parseHTML(data) 00255 elif ext in ['.hdf']: 00256 strings = self.parseHDF(data) 00257 if len(strings): 00258 print "Found %d strings in %s" % (len(strings), file) 00259 return strings 00260 return [] 00261 00262 def walkDirectory(self, path): 00263 if path in self.ignore_paths: return [] 00264 fpath = self.root + '/' + path 00265 files = os.listdir(fpath) 00266 dirs = [] 00267 results = [] 00268 for file in files: 00269 if file[0] == '.': continue 00270 fname = fpath + '/' + file 00271 if os.path.isdir(fname): 00272 dirs.append(file) 00273 else: 00274 strings = self.handleFile(path + '/' + file) 00275 if len(strings): 00276 results.append((path + '/' + file, strings)) 00277 for dir in dirs: 00278 if dir not in ["release"]: 00279 results = results + self.walkDirectory(path + '/' + dir) 00280 return results 00281 00282 def cleanHtmlString(self, s): 00283 s = re.sub("\s+", " ", s) 00284 return string.strip(s) 00285 00286 def containsWords(self, s, ishtml): 00287 if ishtml: 00288 s = string.replace(s, ' ', ' ') 00289 s = string.replace(s, '"', '"') 00290 s = string.replace(s, '©', '') 00291 s = string.replace(s, '<', '<') 00292 s = string.replace(s, '>', '>') 00293 s = string.replace(s, '&', '&') 00294 for x in range (len (s)): 00295 n = ord(s[x]) 00296 if (n>47 and n<58) or (n>64 and n<91) or (n>96 and n<123): return 1 00297 return 0 00298 00299 def findString(self, s): 00300 rows = self.tdb.strings.fetchRows( ('string', s) ) 00301 if len(rows) == 0: 00302 row = self.tdb.strings.newRow() 00303 row.string = s 00304 row.save() 00305 return row.string_id 00306 elif len(rows) > 1: 00307 raise eTransError, "String %s exists multiple times!" % s 00308 else: 00309 return rows[0].string_id 00310 00311 def loadStrings(self, one_file=None, verbose=0): 00312 if one_file is not None: 00313 strings = self.handleFile(one_file) 00314 results = [(one_file, strings)] 00315 else: 00316 results = self.walkDirectory('tmpl') 00317 uniq = {} 00318 cnt = 0 00319 seen_hdf = {} 00320 for fname, strings in results: 00321 for (s, ofs, ishtml) in strings: 00322 if s and string.strip(s): 00323 l = len(s) 00324 if ishtml: 00325 s = self.cleanHtmlString(s) 00326 if self.containsWords(s, ishtml): 00327 if type(ofs) == type(""): # HDF 00328 if seen_hdf.has_key(ofs): 00329 if seen_hdf[ofs][0] != s: 00330 log("Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s)) 00331 else: 00332 seen_hdf[ofs] = (s, fname) 00333 try: 00334 uniq[s].append((fname, ofs, l)) 00335 except KeyError: 00336 uniq[s] = [(fname, ofs, l)] 00337 cnt = cnt + 1 00338 print "%d strings, %d unique" % (cnt, len(uniq.keys())) 00339 fp = open("map", 'w') 00340 for (s, locs) in uniq.items(): 00341 locs = map(lambda x: "%s:%s:%d" % x, locs) 00342 fp.write('#: %s\n' % (string.join(locs, ','))) 00343 fp.write('msgid=%s\n\n' % repr(s)) 00344 00345 log("Loading strings/locations into database") 00346 locations = [] 00347 for (s, locs) in uniq.items(): 00348 s_id = self.findString(s) 00349 for (fname, ofs, l) in locs: 00350 if type(ofs) == type(""): # ie, its HDF 00351 location = "hdf:%s" % ofs 00352 else: 00353 location = "ofs:%d:%d" % (ofs, l) 00354 loc_r = TransLoc(s_id, fname, location) 00355 locations.append(loc_r) 00356 return locations 00357 00358 def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0): 00359 hdf = neo_util.HDF() 00360 if exist and lang == 'en': return hdf 00361 done = {} 00362 locations.sort() 00363 maps = self.tdb.maps.fetchRows( ('lang', lang) ) 00364 maps_d = {} 00365 for map in maps: 00366 maps_d[int(map.string_id)] = map 00367 strings = self.tdb.strings.fetchRows() 00368 strings_d = {} 00369 for string in strings: 00370 strings_d[int(string.string_id)] = string 00371 count = 0 00372 for loc in locations: 00373 s_id = int(loc.string_id) 00374 if done.has_key(s_id): continue 00375 try: 00376 s_row = maps_d[s_id] 00377 if exist: continue 00378 except KeyError: 00379 try: 00380 s_row = strings_d[s_id] 00381 except KeyError: 00382 log("Missing string_id %d, skipping" % s_id) 00383 continue 00384 count = count + 1 00385 if tiered: 00386 hdf.setValue("%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string) 00387 else: 00388 hdf.setValue("%s.%s" % (prefix, s_id), s_row.string) 00389 done[s_id] = 1 00390 if exist == 1: log("Missing %d strings for lang %s" % (count, lang)) 00391 return hdf 00392 00393 def dumpStrings(self, locations, lang=None): 00394 log("Dumping strings to HDF") 00395 if lang is None: 00396 langs = ['en'] 00397 sql = "select lang from nt_trans_maps group by lang" 00398 cursor = self.tdb.defaultCursor() 00399 cursor.execute(sql) 00400 rows = cursor.fetchall() 00401 for row in rows: 00402 langs.append(row[0]) 00403 else: 00404 langs = [lang] 00405 00406 for a_lang in langs: 00407 hdf = self.stringsHDF('S', locations, a_lang) 00408 hdf.writeFile("strings_%s.hdf" % a_lang) 00409 00410 for a_lang in langs: 00411 hdf = self.stringsHDF('S', locations, a_lang, exist=1) 00412 if hdf.child(): 00413 hdf.writeFile("strings_missing_%s.hdf" % a_lang) 00414 00415 def fetchString(self, s_id, lang): 00416 if lang == "hdf": 00417 return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id) 00418 rows = self.tdb.maps.fetchRows( [('string_id', s_id), ('lang', lang)] ) 00419 if len(rows) == 0: 00420 try: 00421 row = self.tdb.strings.fetchRow( ('string_id', s_id) ) 00422 except odb.eNoMatchingRows: 00423 log("Unable to find string id %s" % s_id) 00424 raise eNoString 00425 if lang != 'en': 00426 log("Untranslated string for id %s" % s_id) 00427 return row.string 00428 else: 00429 return rows[0].string 00430 00431 def dumpFiles(self, locations, lang): 00432 log("Dumping files for %s" % lang) 00433 files = {} 00434 for row in locations: 00435 try: 00436 files[row.filename].append(row) 00437 except KeyError: 00438 files[row.filename] = [row] 00439 00440 hdf_map = [] 00441 00442 os.system("rm -rf %s/gen/tmpl" % (self.root)) 00443 for file in files.keys(): 00444 fname = "%s/gen/%s" % (self.root, file) 00445 try: 00446 os.makedirs(os.path.dirname(fname)) 00447 except OSError, reason: 00448 if reason[0] != 17: 00449 raise 00450 do_hdf = 0 00451 x = string.rfind(file, '.') 00452 if x != -1 and file[x:] == '.hdf': 00453 do_hdf = 1 00454 ofs = [] 00455 for loc in files[file]: 00456 parts = string.split(loc.location, ':') 00457 if len(parts) == 3 and parts[0] == 'ofs' and do_hdf == 0: 00458 ofs.append((int(parts[1]), int(parts[2]), loc.string_id)) 00459 elif len(parts) == 2 and parts[0] == 'hdf' and do_hdf == 1: 00460 hdf_map.append((parts[1], loc.string_id)) 00461 else: 00462 log("Invalid location for loc_id %s" % loc.loc_id) 00463 continue 00464 if not do_hdf: 00465 ofs.sort() 00466 data = open(self.root + '/' + file).read() 00467 # ok, now we split up the original data into sections 00468 x = 0 00469 n = len(data) 00470 out = [] 00471 #sys.stderr.write("%s\n" % repr(ofs)) 00472 while len(ofs): 00473 if ofs[0][0] > x: 00474 out.append(data[x:ofs[0][0]]) 00475 x = ofs[0][0] 00476 elif ofs[0][0] == x: 00477 out.append(self.fetchString(ofs[0][2], lang)) 00478 x = ofs[0][0] + ofs[0][1] 00479 ofs = ofs[1:] 00480 else: 00481 log("How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2])) 00482 log("Data[x:20]: %s" % data[x:20]) 00483 log("Data[ofs:20]: %s" % data[ofs[0][0]:20]) 00484 break 00485 if n > x: 00486 out.append(data[x:]) 00487 odata = string.join(out, '') 00488 open(fname, 'w').write(odata) 00489 00490 if lang == "hdf": 00491 langs = self.languages 00492 else: 00493 langs = [lang] 00494 00495 for d_lang in langs: 00496 # dumping the extracted strings 00497 hdf = self.stringsHDF('Lang.Extracted', locations, d_lang, tiered=1) 00498 fname = "%s/gen/tmpl/lang_%s.hdf" % (self.root, d_lang) 00499 hdf.writeFile(fname) 00500 data = open(fname).read() 00501 fp = open(fname, 'w') 00502 fp.write('## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n') 00503 fp.write(data) 00504 fp.write('\n#include "lang_map.hdf"\n') 00505 00506 # dumping the hdf strings file 00507 if d_lang == "en": 00508 map_file = "%s/gen/tmpl/lang_map.hdf" % (self.root) 00509 else: 00510 map_file = "%s/gen/tmpl/%s/lang_map.hdf" % (self.root, d_lang) 00511 try: 00512 os.makedirs(os.path.dirname(map_file)) 00513 except OSError, reason: 00514 if reason[0] != 17: raise 00515 map_hdf = neo_util.HDF() 00516 for (name, s_id) in hdf_map: 00517 str = hdf.getValue('Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), '') 00518 map_hdf.setValue(name, str) 00519 map_hdf.writeFile(map_file) 00520 00521 def loadMap(self, file, prefix, lang): 00522 log("Loading map for language %s" % lang) 00523 hdf = neo_util.HDF() 00524 hdf.readFile(file) 00525 obj = hdf.getChild(prefix) 00526 updates = 0 00527 new_r = 0 00528 while obj is not None: 00529 s_id = obj.name() 00530 str = obj.value() 00531 00532 try: 00533 map_r = self.tdb.maps.fetchRow( [('string_id', s_id), ('lang', lang)]) 00534 except odb.eNoMatchingRows: 00535 map_r = self.tdb.maps.newRow() 00536 map_r.string_id = s_id 00537 map_r.lang = lang 00538 new_r = new_r + 1 00539 00540 if map_r.string != str: 00541 updates = updates + 1 00542 map_r.string = str 00543 map_r.save() 00544 00545 obj = obj.next() 00546 log("New maps: %d Updates: %d" % (new_r, updates - new_r)) 00547 00548 00549 def main(argv): 00550 alist, args = getopt.getopt(argv[1:], "f:v:", ["help", "load=", "lang="]) 00551 00552 one_file = None 00553 verbose = 0 00554 load_file = None 00555 lang = 'en' 00556 for (field, val) in alist: 00557 if field == "--help": 00558 usage(argv[0]) 00559 return -1 00560 if field == "-f": 00561 one_file = val 00562 if field == "-v": 00563 verbose = int(val) 00564 if field == "--load": 00565 load_file = val 00566 if field == "--lang": 00567 lang = val 00568 00569 00570 global DONE 00571 00572 #signal.signal(signal.SIGTERM, handleSignal) 00573 #signal.signal(signal.SIGINT, handleSignal) 00574 00575 log("trans: start") 00576 00577 start_time = time.time() 00578 00579 try: 00580 t = Translator() 00581 if load_file: 00582 t.loadMap(load_file, 'S', lang) 00583 else: 00584 locations = t.loadStrings(one_file, verbose=verbose) 00585 t.dumpStrings(locations) 00586 t.dumpFiles(locations, 'hdf') 00587 except KeyboardInterrupt: 00588 pass 00589 except: 00590 import handle_error 00591 handle_error.handleException("Translation Error") 00592 00593 if __name__ == "__main__": 00594 main(sys.argv)