00001
00002
00003 import sys, string, os, getopt, pwd, signal, time, re
00004 import fcntl
00005
00006 import tstart
00007
00008 import db_trans
00009 from log import *
00010 import neo_cgi, neo_util
00011 import odb
00012
00013 eTransError = "eTransError"
00014
00015 DONE = 0
00016 DEBUG = 0
00017
00018 TIER2_DIV = 11
00019 TIER1_DIV = 11 * TIER2_DIV
00020
00021 if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0
00022
00023 def handleSignal(*arg):
00024 global DONE
00025 DONE = 1
00026
00027 def usage():
00028 print "usage info!!"
00029
00030 def exceptionString():
00031 import StringIO, traceback
00032
00033
00034 sfp = StringIO.StringIO()
00035 traceback.print_exc(file=sfp)
00036 exception = sfp.getvalue()
00037 sfp.close()
00038
00039 return exception
00040
00041 class TransLoc:
00042 def __init__ (self, string_id, filename, location):
00043 self.string_id = string_id
00044 self.filename = filename
00045 self.location = location
00046
00047 class Translator:
00048 _HTML_TAG_RE = None
00049 _HTML_TAG_REGEX = '<[^!][^>]*?>'
00050 _HTML_CMT_RE = None
00051 _HTML_CMT_REGEX = '<!--.*?-->'
00052 _CS_TAG_RE = None
00053 _CS_TAG_REGEX = '<\\?.+?\\?>'
00054
00055 def __init__ (self):
00056 self.tdb = db_trans.trans_connect()
00057
00058
00059
00060
00061 self.root = "testroot"
00062 self.languages = ['es', 'en']
00063
00064 self.ignore_paths = ['tmpl/m']
00065 self.ignore_files = ['blah_ignore.cs']
00066
00067
00068 self.ignore_patterns = ['tmpl/[^ ]*_js.cs']
00069
00070
00071
00072
00073 if self.root is None:
00074 raise "Unable to determine installation root"
00075
00076
00077 if Translator._HTML_TAG_RE is None:
00078 Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL)
00079 if Translator._HTML_CMT_RE is None:
00080 Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL)
00081 if Translator._CS_TAG_RE is None:
00082 Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL)
00083
00084 self._html_state = 0
00085
00086
00087 def parseHTMLTag(self, data):
00088
00089 i = 0
00090 if len(data) == 0: return []
00091 if data[0] in '/?': return []
00092 while i < len(data) and data[i] not in ' \n\r\t>': i = i + 1
00093 if i == len(data): return []
00094 tag = data[:i].lower()
00095
00096
00097 results = []
00098 attrfind = re.compile(
00099 r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*'
00100 r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?')
00101 k = i
00102 attrs = {}
00103 attrs_beg = {}
00104 while k < len(data):
00105 match = attrfind.match(data, k)
00106 if not match: break
00107 attrname, rest, attrvalue = match.group(1, 2, 3)
00108 if not rest:
00109 attrvalue = attrname
00110 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
00111 attrvalue[:1] == '"' == attrvalue[-1:]:
00112 attrvalue = attrvalue[1:-1]
00113 attrname = attrname.lower()
00114 if attrs.has_key(attrname):
00115 log("Can't handle duplicate attrs: %s" % attrname)
00116 attrs[attrname] = attrvalue
00117 attrs_beg[attrname] = match.start(3)
00118 k = match.end(0)
00119
00120 find_l = []
00121 if tag == "input":
00122 if attrs.get('type', "").lower() in ["submit", "button"]:
00123 find_l.append((attrs.get('value', ''), attrs_beg.get('value', 0)))
00124
00125 for s,k in find_l:
00126 if s:
00127 x = data[k:].find(s)
00128 if x != -1: results.append((s, x+k, 1))
00129
00130 return results
00131
00132 def parseHTML(self, data, reset=1):
00133 if reset: self._html_state = 0
00134 if DEBUG: print "- %d ---------\n%s\n- E ---------" % (self._html_state, data)
00135
00136 results = []
00137 i = 0
00138 n = len(data)
00139
00140 if self._html_state:
00141 if self._html_state == 2:
00142 x = string.find(data[i:], '-->')
00143 l = 3
00144 else:
00145 x = string.find(data[i:], '>')
00146 l = 1
00147 if x == -1: return results
00148 i = i + x + l
00149 self._html_state = 0
00150 while i < n:
00151 if DEBUG: print "MATCHING>%s<MATCHING" % data[i:]
00152 cmt_b = string.find(data[i:], '<!--')
00153 cmt_e = string.find(data[i:], '-->')
00154 tag_b = string.find(data[i:], '<')
00155 tag_e = string.find(data[i:], '>')
00156 if DEBUG: print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e)
00157 if cmt_b != -1 and cmt_b <= tag_b:
00158 x = i
00159 y = i+cmt_b-1
00160 while x < y and data[x] in string.whitespace: x+=1
00161 while y > x and data[y] in string.whitespace: y-=1
00162 results.append((data[x:y+1], x, 1))
00163 if cmt_e == -1:
00164 self._html_state = 2
00165 break
00166 i = i + cmt_e + 3
00167 elif tag_b != -1:
00168 x = i
00169 y = i+tag_b-1
00170 while x < y and data[x] in string.whitespace: x+=1
00171 while y > x and data[y] in string.whitespace: y-=1
00172 results.append((data[x:y+1], x, 1))
00173 if tag_e == -1:
00174 self._html_state = 1
00175 break
00176 h_results = self.parseHTMLTag(data[i+tag_b+1:i+tag_e])
00177 h_results = map(lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results)
00178 results = results + h_results
00179 i = i + tag_e + 1
00180 else:
00181 x = i
00182 y = n-1
00183 while x < y and data[x] in string.whitespace: x+=1
00184 while y > x and data[y] in string.whitespace: y-=1
00185 results.append((data[x:y+1], x, 1))
00186 break
00187 return results
00188
00189 def parseCS(self, data):
00190 results = []
00191 i = 0
00192 n = len(data)
00193 while i < n:
00194 m = Translator._CS_TAG_RE.search(data, i)
00195 if not m:
00196
00197 x = string.find(data[i:], '<?')
00198 if x == -1:
00199 results.append((data[i:], i))
00200 else:
00201 results.append((data[i:x], i))
00202 break
00203 (b, e) = m.span()
00204 if i != b: results.append((data[i:b], i))
00205 i = e
00206 t_results = []
00207 self._html_in = 0
00208 for (s, ofs) in results:
00209 r = self.parseHTML(s, reset=0)
00210 r = map(lambda x: (x[0], x[1] + ofs, x[2]), r)
00211 t_results = t_results + r
00212 return t_results
00213
00214 def descendHDF(self, obj, prefix):
00215 results = []
00216 while obj is not None:
00217 if obj.value():
00218 attrs = obj.attrs()
00219 attrs = map(lambda x: x[0], attrs)
00220 if "Lang" in attrs:
00221 if prefix:
00222 results.append((obj.value(), "%s.%s" % (prefix, obj.name()), 0))
00223 else:
00224 results.append((obj.value(), "%s" % (obj.name()), 0))
00225 if obj.child():
00226 if prefix:
00227 results = results + self.descendHDF(obj.child(), "%s.%s" % (prefix, obj.name()))
00228 else:
00229 results = results + self.descendHDF(obj.child(), (obj.name()))
00230 obj = obj.next()
00231 return results
00232
00233 def parseHDF(self, data):
00234
00235
00236 hdf = neo_util.HDF()
00237 hdf.readString(data, 1)
00238 return self.descendHDF(hdf, "")
00239
00240 def handleFile(self, file):
00241 if file in self.ignore_files: return []
00242 for a_re in self.ignore_patterns:
00243 if re.match(a_re,file):
00244 return []
00245 fpath = self.root + '/' + file
00246 x = string.rfind(file, '.')
00247 if x == -1: return []
00248 data = open(fpath, 'r').read()
00249 ext = file[x:]
00250 strings = []
00251 if ext in ['.cst', '.cs']:
00252 strings = self.parseCS(data)
00253 elif ext in ['.html', '.htm']:
00254 strings = self.parseHTML(data)
00255 elif ext in ['.hdf']:
00256 strings = self.parseHDF(data)
00257 if len(strings):
00258 print "Found %d strings in %s" % (len(strings), file)
00259 return strings
00260 return []
00261
00262 def walkDirectory(self, path):
00263 if path in self.ignore_paths: return []
00264 fpath = self.root + '/' + path
00265 files = os.listdir(fpath)
00266 dirs = []
00267 results = []
00268 for file in files:
00269 if file[0] == '.': continue
00270 fname = fpath + '/' + file
00271 if os.path.isdir(fname):
00272 dirs.append(file)
00273 else:
00274 strings = self.handleFile(path + '/' + file)
00275 if len(strings):
00276 results.append((path + '/' + file, strings))
00277 for dir in dirs:
00278 if dir not in ["release"]:
00279 results = results + self.walkDirectory(path + '/' + dir)
00280 return results
00281
00282 def cleanHtmlString(self, s):
00283 s = re.sub("\s+", " ", s)
00284 return string.strip(s)
00285
00286 def containsWords(self, s, ishtml):
00287 if ishtml:
00288 s = string.replace(s, ' ', ' ')
00289 s = string.replace(s, '"', '"')
00290 s = string.replace(s, '©', '')
00291 s = string.replace(s, '<', '<')
00292 s = string.replace(s, '>', '>')
00293 s = string.replace(s, '&', '&')
00294 for x in range (len (s)):
00295 n = ord(s[x])
00296 if (n>47 and n<58) or (n>64 and n<91) or (n>96 and n<123): return 1
00297 return 0
00298
00299 def findString(self, s):
00300 rows = self.tdb.strings.fetchRows( ('string', s) )
00301 if len(rows) == 0:
00302 row = self.tdb.strings.newRow()
00303 row.string = s
00304 row.save()
00305 return row.string_id
00306 elif len(rows) > 1:
00307 raise eTransError, "String %s exists multiple times!" % s
00308 else:
00309 return rows[0].string_id
00310
00311 def loadStrings(self, one_file=None, verbose=0):
00312 if one_file is not None:
00313 strings = self.handleFile(one_file)
00314 results = [(one_file, strings)]
00315 else:
00316 results = self.walkDirectory('tmpl')
00317 uniq = {}
00318 cnt = 0
00319 seen_hdf = {}
00320 for fname, strings in results:
00321 for (s, ofs, ishtml) in strings:
00322 if s and string.strip(s):
00323 l = len(s)
00324 if ishtml:
00325 s = self.cleanHtmlString(s)
00326 if self.containsWords(s, ishtml):
00327 if type(ofs) == type(""):
00328 if seen_hdf.has_key(ofs):
00329 if seen_hdf[ofs][0] != s:
00330 log("Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s))
00331 else:
00332 seen_hdf[ofs] = (s, fname)
00333 try:
00334 uniq[s].append((fname, ofs, l))
00335 except KeyError:
00336 uniq[s] = [(fname, ofs, l)]
00337 cnt = cnt + 1
00338 print "%d strings, %d unique" % (cnt, len(uniq.keys()))
00339 fp = open("map", 'w')
00340 for (s, locs) in uniq.items():
00341 locs = map(lambda x: "%s:%s:%d" % x, locs)
00342 fp.write('#: %s\n' % (string.join(locs, ',')))
00343 fp.write('msgid=%s\n\n' % repr(s))
00344
00345 log("Loading strings/locations into database")
00346 locations = []
00347 for (s, locs) in uniq.items():
00348 s_id = self.findString(s)
00349 for (fname, ofs, l) in locs:
00350 if type(ofs) == type(""):
00351 location = "hdf:%s" % ofs
00352 else:
00353 location = "ofs:%d:%d" % (ofs, l)
00354 loc_r = TransLoc(s_id, fname, location)
00355 locations.append(loc_r)
00356 return locations
00357
00358 def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0):
00359 hdf = neo_util.HDF()
00360 if exist and lang == 'en': return hdf
00361 done = {}
00362 locations.sort()
00363 maps = self.tdb.maps.fetchRows( ('lang', lang) )
00364 maps_d = {}
00365 for map in maps:
00366 maps_d[int(map.string_id)] = map
00367 strings = self.tdb.strings.fetchRows()
00368 strings_d = {}
00369 for string in strings:
00370 strings_d[int(string.string_id)] = string
00371 count = 0
00372 for loc in locations:
00373 s_id = int(loc.string_id)
00374 if done.has_key(s_id): continue
00375 try:
00376 s_row = maps_d[s_id]
00377 if exist: continue
00378 except KeyError:
00379 try:
00380 s_row = strings_d[s_id]
00381 except KeyError:
00382 log("Missing string_id %d, skipping" % s_id)
00383 continue
00384 count = count + 1
00385 if tiered:
00386 hdf.setValue("%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string)
00387 else:
00388 hdf.setValue("%s.%s" % (prefix, s_id), s_row.string)
00389 done[s_id] = 1
00390 if exist == 1: log("Missing %d strings for lang %s" % (count, lang))
00391 return hdf
00392
00393 def dumpStrings(self, locations, lang=None):
00394 log("Dumping strings to HDF")
00395 if lang is None:
00396 langs = ['en']
00397 sql = "select lang from nt_trans_maps group by lang"
00398 cursor = self.tdb.defaultCursor()
00399 cursor.execute(sql)
00400 rows = cursor.fetchall()
00401 for row in rows:
00402 langs.append(row[0])
00403 else:
00404 langs = [lang]
00405
00406 for a_lang in langs:
00407 hdf = self.stringsHDF('S', locations, a_lang)
00408 hdf.writeFile("strings_%s.hdf" % a_lang)
00409
00410 for a_lang in langs:
00411 hdf = self.stringsHDF('S', locations, a_lang, exist=1)
00412 if hdf.child():
00413 hdf.writeFile("strings_missing_%s.hdf" % a_lang)
00414
00415 def fetchString(self, s_id, lang):
00416 if lang == "hdf":
00417 return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id)
00418 rows = self.tdb.maps.fetchRows( [('string_id', s_id), ('lang', lang)] )
00419 if len(rows) == 0:
00420 try:
00421 row = self.tdb.strings.fetchRow( ('string_id', s_id) )
00422 except odb.eNoMatchingRows:
00423 log("Unable to find string id %s" % s_id)
00424 raise eNoString
00425 if lang != 'en':
00426 log("Untranslated string for id %s" % s_id)
00427 return row.string
00428 else:
00429 return rows[0].string
00430
00431 def dumpFiles(self, locations, lang):
00432 log("Dumping files for %s" % lang)
00433 files = {}
00434 for row in locations:
00435 try:
00436 files[row.filename].append(row)
00437 except KeyError:
00438 files[row.filename] = [row]
00439
00440 hdf_map = []
00441
00442 os.system("rm -rf %s/gen/tmpl" % (self.root))
00443 for file in files.keys():
00444 fname = "%s/gen/%s" % (self.root, file)
00445 try:
00446 os.makedirs(os.path.dirname(fname))
00447 except OSError, reason:
00448 if reason[0] != 17:
00449 raise
00450 do_hdf = 0
00451 x = string.rfind(file, '.')
00452 if x != -1 and file[x:] == '.hdf':
00453 do_hdf = 1
00454 ofs = []
00455 for loc in files[file]:
00456 parts = string.split(loc.location, ':')
00457 if len(parts) == 3 and parts[0] == 'ofs' and do_hdf == 0:
00458 ofs.append((int(parts[1]), int(parts[2]), loc.string_id))
00459 elif len(parts) == 2 and parts[0] == 'hdf' and do_hdf == 1:
00460 hdf_map.append((parts[1], loc.string_id))
00461 else:
00462 log("Invalid location for loc_id %s" % loc.loc_id)
00463 continue
00464 if not do_hdf:
00465 ofs.sort()
00466 data = open(self.root + '/' + file).read()
00467
00468 x = 0
00469 n = len(data)
00470 out = []
00471
00472 while len(ofs):
00473 if ofs[0][0] > x:
00474 out.append(data[x:ofs[0][0]])
00475 x = ofs[0][0]
00476 elif ofs[0][0] == x:
00477 out.append(self.fetchString(ofs[0][2], lang))
00478 x = ofs[0][0] + ofs[0][1]
00479 ofs = ofs[1:]
00480 else:
00481 log("How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2]))
00482 log("Data[x:20]: %s" % data[x:20])
00483 log("Data[ofs:20]: %s" % data[ofs[0][0]:20])
00484 break
00485 if n > x:
00486 out.append(data[x:])
00487 odata = string.join(out, '')
00488 open(fname, 'w').write(odata)
00489
00490 if lang == "hdf":
00491 langs = self.languages
00492 else:
00493 langs = [lang]
00494
00495 for d_lang in langs:
00496
00497 hdf = self.stringsHDF('Lang.Extracted', locations, d_lang, tiered=1)
00498 fname = "%s/gen/tmpl/lang_%s.hdf" % (self.root, d_lang)
00499 hdf.writeFile(fname)
00500 data = open(fname).read()
00501 fp = open(fname, 'w')
00502 fp.write('## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n')
00503 fp.write(data)
00504 fp.write('\n#include "lang_map.hdf"\n')
00505
00506
00507 if d_lang == "en":
00508 map_file = "%s/gen/tmpl/lang_map.hdf" % (self.root)
00509 else:
00510 map_file = "%s/gen/tmpl/%s/lang_map.hdf" % (self.root, d_lang)
00511 try:
00512 os.makedirs(os.path.dirname(map_file))
00513 except OSError, reason:
00514 if reason[0] != 17: raise
00515 map_hdf = neo_util.HDF()
00516 for (name, s_id) in hdf_map:
00517 str = hdf.getValue('Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), '')
00518 map_hdf.setValue(name, str)
00519 map_hdf.writeFile(map_file)
00520
00521 def loadMap(self, file, prefix, lang):
00522 log("Loading map for language %s" % lang)
00523 hdf = neo_util.HDF()
00524 hdf.readFile(file)
00525 obj = hdf.getChild(prefix)
00526 updates = 0
00527 new_r = 0
00528 while obj is not None:
00529 s_id = obj.name()
00530 str = obj.value()
00531
00532 try:
00533 map_r = self.tdb.maps.fetchRow( [('string_id', s_id), ('lang', lang)])
00534 except odb.eNoMatchingRows:
00535 map_r = self.tdb.maps.newRow()
00536 map_r.string_id = s_id
00537 map_r.lang = lang
00538 new_r = new_r + 1
00539
00540 if map_r.string != str:
00541 updates = updates + 1
00542 map_r.string = str
00543 map_r.save()
00544
00545 obj = obj.next()
00546 log("New maps: %d Updates: %d" % (new_r, updates - new_r))
00547
00548
00549 def main(argv):
00550 alist, args = getopt.getopt(argv[1:], "f:v:", ["help", "load=", "lang="])
00551
00552 one_file = None
00553 verbose = 0
00554 load_file = None
00555 lang = 'en'
00556 for (field, val) in alist:
00557 if field == "--help":
00558 usage(argv[0])
00559 return -1
00560 if field == "-f":
00561 one_file = val
00562 if field == "-v":
00563 verbose = int(val)
00564 if field == "--load":
00565 load_file = val
00566 if field == "--lang":
00567 lang = val
00568
00569
00570 global DONE
00571
00572
00573
00574
00575 log("trans: start")
00576
00577 start_time = time.time()
00578
00579 try:
00580 t = Translator()
00581 if load_file:
00582 t.loadMap(load_file, 'S', lang)
00583 else:
00584 locations = t.loadStrings(one_file, verbose=verbose)
00585 t.dumpStrings(locations)
00586 t.dumpFiles(locations, 'hdf')
00587 except KeyboardInterrupt:
00588 pass
00589 except:
00590 import handle_error
00591 handle_error.handleException("Translation Error")
00592
00593 if __name__ == "__main__":
00594 main(sys.argv)