3 import sys, string, os, getopt, pwd, signal, time, re
10 import neo_cgi, neo_util
13 eTransError =
"eTransError" 19 TIER1_DIV = 11 * TIER2_DIV
21 if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0
31 import StringIO, traceback
34 sfp = StringIO.StringIO()
35 traceback.print_exc(file=sfp)
36 exception = sfp.getvalue()
42 def __init__ (self, string_id, filename, location):
49 _HTML_TAG_REGEX =
'<[^!][^>]*?>' 51 _HTML_CMT_REGEX =
'<!--.*?-->' 53 _CS_TAG_REGEX =
'<\\?.+?\\?>' 56 self.
tdb = db_trans.trans_connect()
74 raise "Unable to determine installation root" 77 if Translator._HTML_TAG_RE
is None:
78 Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL)
79 if Translator._HTML_CMT_RE
is None:
80 Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL)
81 if Translator._CS_TAG_RE
is None:
82 Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL)
90 if len(data) == 0:
return []
91 if data[0]
in '/?':
return []
92 while i < len(data)
and data[i]
not in ' \n\r\t>': i = i + 1
93 if i == len(data):
return []
94 tag = data[:i].lower()
98 attrfind = re.compile(
99 r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' 100 r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?')
105 match = attrfind.match(data, k)
107 attrname, rest, attrvalue = match.group(1, 2, 3)
110 elif attrvalue[:1] ==
'\'' == attrvalue[-1:]
or \
111 attrvalue[:1] ==
'"' == attrvalue[-1:]:
112 attrvalue = attrvalue[1:-1]
113 attrname = attrname.lower()
114 if attrs.has_key(attrname):
115 log(
"Can't handle duplicate attrs: %s" % attrname)
116 attrs[attrname] = attrvalue
117 attrs_beg[attrname] = match.start(3)
122 if attrs.get(
'type',
"").lower()
in [
"submit",
"button"]:
123 find_l.append((attrs.get(
'value',
''), attrs_beg.get(
'value', 0)))
128 if x != -1: results.append((s, x+k, 1))
134 if DEBUG:
print "- %d ---------\n%s\n- E ---------" % (self.
_html_state, data)
142 x = string.find(data[i:],
'-->')
145 x = string.find(data[i:],
'>')
147 if x == -1:
return results
151 if DEBUG:
print "MATCHING>%s<MATCHING" % data[i:]
152 cmt_b = string.find(data[i:],
'<!--')
153 cmt_e = string.find(data[i:],
'-->')
154 tag_b = string.find(data[i:],
'<')
155 tag_e = string.find(data[i:],
'>')
156 if DEBUG:
print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e)
157 if cmt_b != -1
and cmt_b <= tag_b:
160 while x < y
and data[x]
in string.whitespace: x+=1
161 while y > x
and data[y]
in string.whitespace: y-=1
162 results.append((data[x:y+1], x, 1))
170 while x < y
and data[x]
in string.whitespace: x+=1
171 while y > x
and data[y]
in string.whitespace: y-=1
172 results.append((data[x:y+1], x, 1))
177 h_results = map(
lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results)
178 results = results + h_results
183 while x < y
and data[x]
in string.whitespace: x+=1
184 while y > x
and data[y]
in string.whitespace: y-=1
185 results.append((data[x:y+1], x, 1))
194 m = Translator._CS_TAG_RE.search(data, i)
197 x = string.find(data[i:],
'<?')
199 results.append((data[i:], i))
201 results.append((data[i:x], i))
204 if i != b: results.append((data[i:b], i))
208 for (s, ofs)
in results:
210 r = map(
lambda x: (x[0], x[1] + ofs, x[2]), r)
211 t_results = t_results + r
216 while obj
is not None:
219 attrs = map(
lambda x: x[0], attrs)
222 results.append((obj.value(),
"%s.%s" % (prefix, obj.name()), 0))
224 results.append((obj.value(),
"%s" % (obj.name()), 0))
227 results = results + self.
descendHDF(obj.child(),
"%s.%s" % (prefix, obj.name()))
229 results = results + self.
descendHDF(obj.child(), (obj.name()))
237 hdf.readString(data, 1)
243 if re.match(a_re,file):
245 fpath = self.
root +
'/' + file
246 x = string.rfind(file,
'.')
247 if x == -1:
return []
248 data = open(fpath,
'r').read() 251 if ext
in [
'.cst',
'.cs']:
253 elif ext
in [
'.html',
'.htm']:
255 elif ext
in [
'.hdf']:
258 print "Found %d strings in %s" % (len(strings), file)
264 fpath = self.
root +
'/' + path
265 files = os.listdir(fpath)
269 if file[0] ==
'.':
continue 270 fname = fpath +
'/' + file
271 if os.path.isdir(fname):
276 results.append((path +
'/' + file, strings))
278 if dir
not in [
"release"]:
283 s = re.sub(
"\s+",
" ", s)
284 return string.strip(s)
288 s = string.replace(s,
' ',
' ')
289 s = string.replace(s,
'"',
'"')
290 s = string.replace(s,
'©',
'')
291 s = string.replace(s,
'<',
'<')
292 s = string.replace(s,
'>',
'>')
293 s = string.replace(s,
'&',
'&')
294 for x
in range (len (s)):
296 if (n>47
and n<58)
or (n>64
and n<91)
or (n>96
and n<123):
return 1
300 rows = self.tdb.strings.fetchRows( (
'string', s) )
302 row = self.tdb.strings.newRow()
307 raise eTransError,
"String %s exists multiple times!" % s
309 return rows[0].string_id
312 if one_file
is not None:
314 results = [(one_file, strings)]
320 for fname, strings
in results:
321 for (s, ofs, ishtml)
in strings:
322 if s
and string.strip(s):
327 if type(ofs) == type(
""):
328 if seen_hdf.has_key(ofs):
329 if seen_hdf[ofs][0] != s:
330 log(
"Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s))
332 seen_hdf[ofs] = (s, fname)
334 uniq[s].append((fname, ofs, l))
336 uniq[s] = [(fname, ofs, l)]
338 print "%d strings, %d unique" % (cnt, len(uniq.keys()))
339 fp = open(
"map",
'w')
340 for (s, locs)
in uniq.items():
341 locs = map(
lambda x:
"%s:%s:%d" % x, locs)
342 fp.write(
'#: %s\n' % (string.join(locs,
',')))
343 fp.write(
'msgid=%s\n\n' % repr(s))
345 log(
"Loading strings/locations into database")
347 for (s, locs)
in uniq.items():
349 for (fname, ofs, l)
in locs:
350 if type(ofs) == type(
""):
351 location =
"hdf:%s" % ofs
353 location =
"ofs:%d:%d" % (ofs, l)
354 loc_r =
TransLoc(s_id, fname, location)
355 locations.append(loc_r)
358 def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0):
360 if exist
and lang ==
'en':
return hdf
363 maps = self.tdb.maps.fetchRows( (
'lang', lang) )
366 maps_d[int(map.string_id)] = map
367 strings = self.tdb.strings.fetchRows()
369 for string
in strings:
370 strings_d[int(string.string_id)] = string
372 for loc
in locations:
373 s_id = int(loc.string_id)
374 if done.has_key(s_id):
continue 380 s_row = strings_d[s_id]
382 log(
"Missing string_id %d, skipping" % s_id)
386 hdf.setValue(
"%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string)
388 hdf.setValue(
"%s.%s" % (prefix, s_id), s_row.string)
390 if exist == 1:
log(
"Missing %d strings for lang %s" % (count, lang))
394 log(
"Dumping strings to HDF")
397 sql =
"select lang from nt_trans_maps group by lang" 398 cursor = self.tdb.defaultCursor()
400 rows = cursor.fetchall()
408 hdf.writeFile(
"strings_%s.hdf" % a_lang)
411 hdf = self.
stringsHDF(
'S', locations, a_lang, exist=1)
413 hdf.writeFile(
"strings_missing_%s.hdf" % a_lang)
417 return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id)
418 rows = self.tdb.maps.fetchRows( [(
'string_id', s_id), (
'lang', lang)] )
421 row = self.tdb.strings.fetchRow( (
'string_id', s_id) )
423 log(
"Unable to find string id %s" % s_id)
426 log(
"Untranslated string for id %s" % s_id)
429 return rows[0].string
432 log(
"Dumping files for %s" % lang)
434 for row
in locations:
436 files[row.filename].append(row)
438 files[row.filename] = [row]
442 os.system(
"rm -rf %s/gen/tmpl" % (self.
root))
443 for file
in files.keys():
444 fname =
"%s/gen/%s" % (self.
root, file)
446 os.makedirs(os.path.dirname(fname))
447 except OSError, reason:
451 x = string.rfind(file,
'.')
452 if x != -1
and file[x:] ==
'.hdf':
455 for loc
in files[file]:
456 parts = string.split(loc.location,
':')
457 if len(parts) == 3
and parts[0] ==
'ofs' and do_hdf == 0:
458 ofs.append((int(parts[1]), int(parts[2]), loc.string_id))
459 elif len(parts) == 2
and parts[0] ==
'hdf' and do_hdf == 1:
460 hdf_map.append((parts[1], loc.string_id))
462 log(
"Invalid location for loc_id %s" % loc.loc_id)
466 data = open(self.
root +
'/' + file).read()
474 out.append(data[x:ofs[0][0]])
478 x = ofs[0][0] + ofs[0][1]
481 log(
"How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2]))
482 log(
"Data[x:20]: %s" % data[x:20])
483 log(
"Data[ofs:20]: %s" % data[ofs[0][0]:20])
487 odata = string.join(out,
'')
488 open(fname,
'w').write(odata)
497 hdf = self.
stringsHDF(
'Lang.Extracted', locations, d_lang, tiered=1)
498 fname =
"%s/gen/tmpl/lang_%s.hdf" % (self.
root, d_lang)
500 data = open(fname).read()
501 fp = open(fname,
'w')
502 fp.write(
'## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n')
504 fp.write(
'\n#include "lang_map.hdf"\n')
508 map_file =
"%s/gen/tmpl/lang_map.hdf" % (self.
root)
510 map_file =
"%s/gen/tmpl/%s/lang_map.hdf" % (self.
root, d_lang)
512 os.makedirs(os.path.dirname(map_file))
513 except OSError, reason:
514 if reason[0] != 17:
raise 515 map_hdf = neo_util.HDF()
516 for (name, s_id)
in hdf_map:
517 str = hdf.getValue(
'Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id),
'')
518 map_hdf.setValue(name, str)
519 map_hdf.writeFile(map_file)
522 log(
"Loading map for language %s" % lang)
525 obj = hdf.getChild(prefix)
528 while obj
is not None:
533 map_r = self.tdb.maps.fetchRow( [(
'string_id', s_id), (
'lang', lang)])
535 map_r = self.tdb.maps.newRow()
536 map_r.string_id = s_id
540 if map_r.string != str:
541 updates = updates + 1
546 log(
"New maps: %d Updates: %d" % (new_r, updates - new_r))
550 alist, args = getopt.getopt(argv[1:],
"f:v:", [
"help",
"load=",
"lang="])
556 for (field, val)
in alist:
557 if field ==
"--help":
564 if field ==
"--load":
566 if field ==
"--lang":
577 start_time = time.time()
582 t.loadMap(load_file,
'S', lang)
584 locations = t.loadStrings(one_file, verbose=verbose)
585 t.dumpStrings(locations)
586 t.dumpFiles(locations,
'hdf')
587 except KeyboardInterrupt:
591 handle_error.handleException(
"Translation Error")
593 if __name__ ==
"__main__":
def __init__(self, string_id, filename, location)
def loadStrings(self, one_file=None, verbose=0)
def loadMap(self, file, prefix, lang)
def cleanHtmlString(self, s)
def parseHTMLTag(self, data)
def walkDirectory(self, path)
def descendHDF(self, obj, prefix)
def dumpStrings(self, locations, lang=None)
def handleFile(self, file)
def containsWords(self, s, ishtml)
def fetchString(self, s_id, lang)
def parseHTML(self, data, reset=1)
def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0)
def dumpFiles(self, locations, lang)