trans.py
Go to the documentation of this file.
1 #!/neo/opt/bin/python
2 
3 import sys, string, os, getopt, pwd, signal, time, re
4 import fcntl
5 
6 import tstart
7 
8 import db_trans
9 from log import *
10 import neo_cgi, neo_util
11 import odb
12 
13 eTransError = "eTransError"
14 
15 DONE = 0
16 DEBUG = 0
17 
18 TIER2_DIV = 11
19 TIER1_DIV = 11 * TIER2_DIV
20 
21 if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0
22 
23 def handleSignal(*arg):
24  global DONE
25  DONE = 1
26 
27 def usage():
28  print "usage info!!"
29 
31  import StringIO, traceback
32 
33  ## get the traceback message
34  sfp = StringIO.StringIO()
35  traceback.print_exc(file=sfp)
36  exception = sfp.getvalue()
37  sfp.close()
38 
39  return exception
40 
41 class TransLoc:
42  def __init__ (self, string_id, filename, location):
43  self.string_id = string_id
44  self.filename = filename
45  self.location = location
46 
47 class Translator:
48  _HTML_TAG_RE = None
49  _HTML_TAG_REGEX = '<[^!][^>]*?>'
50  _HTML_CMT_RE = None
51  _HTML_CMT_REGEX = '<!--.*?-->'
52  _CS_TAG_RE = None
53  _CS_TAG_REGEX = '<\\?.+?\\?>'
54 
55  def __init__ (self):
56  self.tdb = db_trans.trans_connect()
57 
58  # configuration data ......
59  # - we should stop hardcoding this... - jeske
60 
61  self.root = "testroot"
62  self.languages = ['es', 'en']
63 
64  self.ignore_paths = ['tmpl/m'] # common place for mockups
65  self.ignore_files = ['blah_ignore.cs'] # ignore clearsilver file
66 
67  # ignore clearsilver javascript files
68  self.ignore_patterns = ['tmpl/[^ ]*_js.cs']
69 
70  # ............................
71 
72 
73  if self.root is None:
74  raise "Unable to determine installation root"
75 
76 
77  if Translator._HTML_TAG_RE is None:
78  Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL)
79  if Translator._HTML_CMT_RE is None:
80  Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL)
81  if Translator._CS_TAG_RE is None:
82  Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL)
83 
84  self._html_state = 0
85 
86 
87  def parseHTMLTag(self, data):
88  # this is only called if we see a full tag in one parse...
89  i = 0
90  if len(data) == 0: return []
91  if data[0] in '/?': return []
92  while i < len(data) and data[i] not in ' \n\r\t>': i = i + 1
93  if i == len(data): return []
94  tag = data[:i].lower()
95  #print "Searching tag: %s" % data
96  #print "Found tag: %s" % tag
97  results = []
98  attrfind = re.compile(
99  r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*'
100  r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?')
101  k = i
102  attrs = {}
103  attrs_beg = {}
104  while k < len(data):
105  match = attrfind.match(data, k)
106  if not match: break
107  attrname, rest, attrvalue = match.group(1, 2, 3)
108  if not rest:
109  attrvalue = attrname
110  elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
111  attrvalue[:1] == '"' == attrvalue[-1:]:
112  attrvalue = attrvalue[1:-1]
113  attrname = attrname.lower()
114  if attrs.has_key(attrname):
115  log("Can't handle duplicate attrs: %s" % attrname)
116  attrs[attrname] = attrvalue
117  attrs_beg[attrname] = match.start(3)
118  k = match.end(0)
119 
120  find_l = []
121  if tag == "input":
122  if attrs.get('type', "").lower() in ["submit", "button"]:
123  find_l.append((attrs.get('value', ''), attrs_beg.get('value', 0)))
124 
125  for s,k in find_l:
126  if s:
127  x = data[k:].find(s)
128  if x != -1: results.append((s, x+k, 1))
129 
130  return results
131 
132  def parseHTML(self, data, reset=1):
133  if reset: self._html_state = 0
134  if DEBUG: print "- %d ---------\n%s\n- E ---------" % (self._html_state, data)
135 
136  results = []
137  i = 0
138  n = len(data)
139  # if we had state from the last parse... find it
140  if self._html_state:
141  if self._html_state == 2:
142  x = string.find(data[i:], '-->')
143  l = 3
144  else:
145  x = string.find(data[i:], '>')
146  l = 1
147  if x == -1: return results
148  i = i + x + l
149  self._html_state = 0
150  while i < n:
151  if DEBUG: print "MATCHING>%s<MATCHING" % data[i:]
152  cmt_b = string.find(data[i:], '<!--')
153  cmt_e = string.find(data[i:], '-->')
154  tag_b = string.find(data[i:], '<')
155  tag_e = string.find(data[i:], '>')
156  if DEBUG: print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e)
157  if cmt_b != -1 and cmt_b <= tag_b:
158  x = i
159  y = i+cmt_b-1
160  while x < y and data[x] in string.whitespace: x+=1
161  while y > x and data[y] in string.whitespace: y-=1
162  results.append((data[x:y+1], x, 1))
163  if cmt_e == -1: # partial comment:
164  self._html_state = 2
165  break
166  i = i + cmt_e + 3
167  elif tag_b != -1:
168  x = i
169  y = i+tag_b-1
170  while x < y and data[x] in string.whitespace: x+=1
171  while y > x and data[y] in string.whitespace: y-=1
172  results.append((data[x:y+1], x, 1))
173  if tag_e == -1: # partial tag
174  self._html_state = 1
175  break
176  h_results = self.parseHTMLTag(data[i+tag_b+1:i+tag_e])
177  h_results = map(lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results)
178  results = results + h_results
179  i = i + tag_e + 1
180  else:
181  x = i
182  y = n-1
183  while x < y and data[x] in string.whitespace: x+=1
184  while y > x and data[y] in string.whitespace: y-=1
185  results.append((data[x:y+1], x, 1))
186  break
187  return results
188 
189  def parseCS(self, data):
190  results = []
191  i = 0
192  n = len(data)
193  while i < n:
194  m = Translator._CS_TAG_RE.search(data, i)
195  if not m:
196  # search for a partial...
197  x = string.find(data[i:], '<?')
198  if x == -1:
199  results.append((data[i:], i))
200  else:
201  results.append((data[i:x], i))
202  break
203  (b, e) = m.span()
204  if i != b: results.append((data[i:b], i))
205  i = e
206  t_results = []
207  self._html_in = 0
208  for (s, ofs) in results:
209  r = self.parseHTML(s, reset=0)
210  r = map(lambda x: (x[0], x[1] + ofs, x[2]), r)
211  t_results = t_results + r
212  return t_results
213 
214  def descendHDF(self, obj, prefix):
215  results = []
216  while obj is not None:
217  if obj.value():
218  attrs = obj.attrs()
219  attrs = map(lambda x: x[0], attrs)
220  if "Lang" in attrs:
221  if prefix:
222  results.append((obj.value(), "%s.%s" % (prefix, obj.name()), 0))
223  else:
224  results.append((obj.value(), "%s" % (obj.name()), 0))
225  if obj.child():
226  if prefix:
227  results = results + self.descendHDF(obj.child(), "%s.%s" % (prefix, obj.name()))
228  else:
229  results = results + self.descendHDF(obj.child(), (obj.name()))
230  obj = obj.next()
231  return results
232 
233  def parseHDF(self, data):
234  # Ok, we handle HDF files specially.. the theory is, we only
235  # extract entire HDF elements which have the attribute Lang
236  hdf = neo_util.HDF()
237  hdf.readString(data, 1)
238  return self.descendHDF(hdf, "")
239 
240  def handleFile(self, file):
241  if file in self.ignore_files: return []
242  for a_re in self.ignore_patterns:
243  if re.match(a_re,file):
244  return []
245  fpath = self.root + '/' + file
246  x = string.rfind(file, '.')
247  if x == -1: return []
248  data = open(fpath, 'r').read()
249  ext = file[x:]
250  strings = []
251  if ext in ['.cst', '.cs']:
252  strings = self.parseCS(data)
253  elif ext in ['.html', '.htm']:
254  strings = self.parseHTML(data)
255  elif ext in ['.hdf']:
256  strings = self.parseHDF(data)
257  if len(strings):
258  print "Found %d strings in %s" % (len(strings), file)
259  return strings
260  return []
261 
262  def walkDirectory(self, path):
263  if path in self.ignore_paths: return []
264  fpath = self.root + '/' + path
265  files = os.listdir(fpath)
266  dirs = []
267  results = []
268  for file in files:
269  if file[0] == '.': continue
270  fname = fpath + '/' + file
271  if os.path.isdir(fname):
272  dirs.append(file)
273  else:
274  strings = self.handleFile(path + '/' + file)
275  if len(strings):
276  results.append((path + '/' + file, strings))
277  for dir in dirs:
278  if dir not in ["release"]:
279  results = results + self.walkDirectory(path + '/' + dir)
280  return results
281 
282  def cleanHtmlString(self, s):
283  s = re.sub("\s+", " ", s)
284  return string.strip(s)
285 
286  def containsWords(self, s, ishtml):
287  if ishtml:
288  s = string.replace(s, '&nbsp;', ' ')
289  s = string.replace(s, '&quot;', '"')
290  s = string.replace(s, '&copy;', '')
291  s = string.replace(s, '&lt;', '<')
292  s = string.replace(s, '&gt;', '>')
293  s = string.replace(s, '&amp;', '&')
294  for x in range (len (s)):
295  n = ord(s[x])
296  if (n>47 and n<58) or (n>64 and n<91) or (n>96 and n<123): return 1
297  return 0
298 
299  def findString(self, s):
300  rows = self.tdb.strings.fetchRows( ('string', s) )
301  if len(rows) == 0:
302  row = self.tdb.strings.newRow()
303  row.string = s
304  row.save()
305  return row.string_id
306  elif len(rows) > 1:
307  raise eTransError, "String %s exists multiple times!" % s
308  else:
309  return rows[0].string_id
310 
311  def loadStrings(self, one_file=None, verbose=0):
312  if one_file is not None:
313  strings = self.handleFile(one_file)
314  results = [(one_file, strings)]
315  else:
316  results = self.walkDirectory('tmpl')
317  uniq = {}
318  cnt = 0
319  seen_hdf = {}
320  for fname, strings in results:
321  for (s, ofs, ishtml) in strings:
322  if s and string.strip(s):
323  l = len(s)
324  if ishtml:
325  s = self.cleanHtmlString(s)
326  if self.containsWords(s, ishtml):
327  if type(ofs) == type(""): # HDF
328  if seen_hdf.has_key(ofs):
329  if seen_hdf[ofs][0] != s:
330  log("Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s))
331  else:
332  seen_hdf[ofs] = (s, fname)
333  try:
334  uniq[s].append((fname, ofs, l))
335  except KeyError:
336  uniq[s] = [(fname, ofs, l)]
337  cnt = cnt + 1
338  print "%d strings, %d unique" % (cnt, len(uniq.keys()))
339  fp = open("map", 'w')
340  for (s, locs) in uniq.items():
341  locs = map(lambda x: "%s:%s:%d" % x, locs)
342  fp.write('#: %s\n' % (string.join(locs, ',')))
343  fp.write('msgid=%s\n\n' % repr(s))
344 
345  log("Loading strings/locations into database")
346  locations = []
347  for (s, locs) in uniq.items():
348  s_id = self.findString(s)
349  for (fname, ofs, l) in locs:
350  if type(ofs) == type(""): # ie, its HDF
351  location = "hdf:%s" % ofs
352  else:
353  location = "ofs:%d:%d" % (ofs, l)
354  loc_r = TransLoc(s_id, fname, location)
355  locations.append(loc_r)
356  return locations
357 
358  def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0):
359  hdf = neo_util.HDF()
360  if exist and lang == 'en': return hdf
361  done = {}
362  locations.sort()
363  maps = self.tdb.maps.fetchRows( ('lang', lang) )
364  maps_d = {}
365  for map in maps:
366  maps_d[int(map.string_id)] = map
367  strings = self.tdb.strings.fetchRows()
368  strings_d = {}
369  for string in strings:
370  strings_d[int(string.string_id)] = string
371  count = 0
372  for loc in locations:
373  s_id = int(loc.string_id)
374  if done.has_key(s_id): continue
375  try:
376  s_row = maps_d[s_id]
377  if exist: continue
378  except KeyError:
379  try:
380  s_row = strings_d[s_id]
381  except KeyError:
382  log("Missing string_id %d, skipping" % s_id)
383  continue
384  count = count + 1
385  if tiered:
386  hdf.setValue("%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string)
387  else:
388  hdf.setValue("%s.%s" % (prefix, s_id), s_row.string)
389  done[s_id] = 1
390  if exist == 1: log("Missing %d strings for lang %s" % (count, lang))
391  return hdf
392 
393  def dumpStrings(self, locations, lang=None):
394  log("Dumping strings to HDF")
395  if lang is None:
396  langs = ['en']
397  sql = "select lang from nt_trans_maps group by lang"
398  cursor = self.tdb.defaultCursor()
399  cursor.execute(sql)
400  rows = cursor.fetchall()
401  for row in rows:
402  langs.append(row[0])
403  else:
404  langs = [lang]
405 
406  for a_lang in langs:
407  hdf = self.stringsHDF('S', locations, a_lang)
408  hdf.writeFile("strings_%s.hdf" % a_lang)
409 
410  for a_lang in langs:
411  hdf = self.stringsHDF('S', locations, a_lang, exist=1)
412  if hdf.child():
413  hdf.writeFile("strings_missing_%s.hdf" % a_lang)
414 
415  def fetchString(self, s_id, lang):
416  if lang == "hdf":
417  return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id)
418  rows = self.tdb.maps.fetchRows( [('string_id', s_id), ('lang', lang)] )
419  if len(rows) == 0:
420  try:
421  row = self.tdb.strings.fetchRow( ('string_id', s_id) )
422  except odb.eNoMatchingRows:
423  log("Unable to find string id %s" % s_id)
424  raise eNoString
425  if lang != 'en':
426  log("Untranslated string for id %s" % s_id)
427  return row.string
428  else:
429  return rows[0].string
430 
431  def dumpFiles(self, locations, lang):
432  log("Dumping files for %s" % lang)
433  files = {}
434  for row in locations:
435  try:
436  files[row.filename].append(row)
437  except KeyError:
438  files[row.filename] = [row]
439 
440  hdf_map = []
441 
442  os.system("rm -rf %s/gen/tmpl" % (self.root))
443  for file in files.keys():
444  fname = "%s/gen/%s" % (self.root, file)
445  try:
446  os.makedirs(os.path.dirname(fname))
447  except OSError, reason:
448  if reason[0] != 17:
449  raise
450  do_hdf = 0
451  x = string.rfind(file, '.')
452  if x != -1 and file[x:] == '.hdf':
453  do_hdf = 1
454  ofs = []
455  for loc in files[file]:
456  parts = string.split(loc.location, ':')
457  if len(parts) == 3 and parts[0] == 'ofs' and do_hdf == 0:
458  ofs.append((int(parts[1]), int(parts[2]), loc.string_id))
459  elif len(parts) == 2 and parts[0] == 'hdf' and do_hdf == 1:
460  hdf_map.append((parts[1], loc.string_id))
461  else:
462  log("Invalid location for loc_id %s" % loc.loc_id)
463  continue
464  if not do_hdf:
465  ofs.sort()
466  data = open(self.root + '/' + file).read()
467  # ok, now we split up the original data into sections
468  x = 0
469  n = len(data)
470  out = []
471  #sys.stderr.write("%s\n" % repr(ofs))
472  while len(ofs):
473  if ofs[0][0] > x:
474  out.append(data[x:ofs[0][0]])
475  x = ofs[0][0]
476  elif ofs[0][0] == x:
477  out.append(self.fetchString(ofs[0][2], lang))
478  x = ofs[0][0] + ofs[0][1]
479  ofs = ofs[1:]
480  else:
481  log("How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2]))
482  log("Data[x:20]: %s" % data[x:20])
483  log("Data[ofs:20]: %s" % data[ofs[0][0]:20])
484  break
485  if n > x:
486  out.append(data[x:])
487  odata = string.join(out, '')
488  open(fname, 'w').write(odata)
489 
490  if lang == "hdf":
491  langs = self.languages
492  else:
493  langs = [lang]
494 
495  for d_lang in langs:
496  # dumping the extracted strings
497  hdf = self.stringsHDF('Lang.Extracted', locations, d_lang, tiered=1)
498  fname = "%s/gen/tmpl/lang_%s.hdf" % (self.root, d_lang)
499  hdf.writeFile(fname)
500  data = open(fname).read()
501  fp = open(fname, 'w')
502  fp.write('## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n')
503  fp.write(data)
504  fp.write('\n#include "lang_map.hdf"\n')
505 
506  # dumping the hdf strings file
507  if d_lang == "en":
508  map_file = "%s/gen/tmpl/lang_map.hdf" % (self.root)
509  else:
510  map_file = "%s/gen/tmpl/%s/lang_map.hdf" % (self.root, d_lang)
511  try:
512  os.makedirs(os.path.dirname(map_file))
513  except OSError, reason:
514  if reason[0] != 17: raise
515  map_hdf = neo_util.HDF()
516  for (name, s_id) in hdf_map:
517  str = hdf.getValue('Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), '')
518  map_hdf.setValue(name, str)
519  map_hdf.writeFile(map_file)
520 
521  def loadMap(self, file, prefix, lang):
522  log("Loading map for language %s" % lang)
523  hdf = neo_util.HDF()
524  hdf.readFile(file)
525  obj = hdf.getChild(prefix)
526  updates = 0
527  new_r = 0
528  while obj is not None:
529  s_id = obj.name()
530  str = obj.value()
531 
532  try:
533  map_r = self.tdb.maps.fetchRow( [('string_id', s_id), ('lang', lang)])
534  except odb.eNoMatchingRows:
535  map_r = self.tdb.maps.newRow()
536  map_r.string_id = s_id
537  map_r.lang = lang
538  new_r = new_r + 1
539 
540  if map_r.string != str:
541  updates = updates + 1
542  map_r.string = str
543  map_r.save()
544 
545  obj = obj.next()
546  log("New maps: %d Updates: %d" % (new_r, updates - new_r))
547 
548 
549 def main(argv):
550  alist, args = getopt.getopt(argv[1:], "f:v:", ["help", "load=", "lang="])
551 
552  one_file = None
553  verbose = 0
554  load_file = None
555  lang = 'en'
556  for (field, val) in alist:
557  if field == "--help":
558  usage(argv[0])
559  return -1
560  if field == "-f":
561  one_file = val
562  if field == "-v":
563  verbose = int(val)
564  if field == "--load":
565  load_file = val
566  if field == "--lang":
567  lang = val
568 
569 
570  global DONE
571 
572  #signal.signal(signal.SIGTERM, handleSignal)
573  #signal.signal(signal.SIGINT, handleSignal)
574 
575  log("trans: start")
576 
577  start_time = time.time()
578 
579  try:
580  t = Translator()
581  if load_file:
582  t.loadMap(load_file, 'S', lang)
583  else:
584  locations = t.loadStrings(one_file, verbose=verbose)
585  t.dumpStrings(locations)
586  t.dumpFiles(locations, 'hdf')
587  except KeyboardInterrupt:
588  pass
589  except:
590  import handle_error
591  handle_error.handleException("Translation Error")
592 
593 if __name__ == "__main__":
594  main(sys.argv)
def __init__(self, string_id, filename, location)
Definition: trans.py:42
def loadStrings(self, one_file=None, verbose=0)
Definition: trans.py:311
def loadMap(self, file, prefix, lang)
Definition: trans.py:521
def parseHTMLTag(self, data)
Definition: trans.py:87
def log(args)
Definition: log.py:107
def descendHDF(self, obj, prefix)
Definition: trans.py:214
def dumpStrings(self, locations, lang=None)
Definition: trans.py:393
def containsWords(self, s, ishtml)
Definition: trans.py:286
def fetchString(self, s_id, lang)
Definition: trans.py:415
def parseHTML(self, data, reset=1)
Definition: trans.py:132
def handleSignal(arg)
Definition: trans.py:23
def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0)
Definition: trans.py:358
def dumpFiles(self, locations, lang)
Definition: trans.py:431


pyclearsilver
Author(s): Scott Noob Hassan
autogenerated on Mon Jun 10 2019 15:51:13