calculate_rmsd_kabsch.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 __doc__ = \
3 """
4 
5 Calculate Root-mean-square deviation (RMSD) of Two Molecules Using Rotation
6 ===========================================================================
7 
8 Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ
9 or PDB format, using transformation and rotation. The order of the atoms *must*
10 be the same for both structures.
11 
12 For more information, usage, example and citation read more at
13 https://github.com/charnley/rmsd
14 
15 """
16 
17 __version__ = '1.2.5'
18 
19 
20 import numpy as np
21 import re
22 
23 
24 # Python 2/3 compatibility
25 # Make range a iterator in Python 2
26 try:
27  range = xrange
28 except NameError:
29  pass
30 
31 
32 def kabsch_rmsd(P, Q):
33  """
34  Rotate matrix P unto Q using Kabsch algorithm and calculate the RMSD.
35 
36  Parameters
37  ----------
38  P : array
39  (N,D) matrix, where N is points and D is dimension.
40  Q : array
41  (N,D) matrix, where N is points and D is dimension.
42 
43  Returns
44  -------
45  rmsd : float
46  root-mean squared deviation
47  """
48  P = kabsch_rotate(P, Q)
49  return rmsd(P, Q)
50 
51 
52 def kabsch_rotate(P, Q):
53  """
54  Rotate matrix P unto matrix Q using Kabsch algorithm.
55 
56  Parameters
57  ----------
58  P : array
59  (N,D) matrix, where N is points and D is dimension.
60  Q : array
61  (N,D) matrix, where N is points and D is dimension.
62 
63  Returns
64  -------
65  P : array
66  (N,D) matrix, where N is points and D is dimension,
67  rotated
68 
69  """
70  U = kabsch(P, Q)
71 
72  # Rotate P
73  P = np.dot(P, U)
74  return P
75 
76 
77 def kabsch(P, Q):
78  """
79  The optimal rotation matrix U is calculated and then used to rotate matrix
80  P unto matrix Q so the minimum root-mean-square deviation (RMSD) can be
81  calculated.
82 
83  Using the Kabsch algorithm with two sets of paired point P and Q, centered
84  around the centroid. Each vector set is represented as an NxD
85  matrix, where D is the the dimension of the space.
86 
87  The algorithm works in three steps:
88  - a translation of P and Q
89  - the computation of a covariance matrix C
90  - computation of the optimal rotation matrix U
91 
92  http://en.wikipedia.org/wiki/Kabsch_algorithm
93 
94  Parameters
95  ----------
96  P : array
97  (N,D) matrix, where N is points and D is dimension.
98  Q : array
99  (N,D) matrix, where N is points and D is dimension.
100 
101  Returns
102  -------
103  U : matrix
104  Rotation matrix (D,D)
105 
106  Example
107  -----
108  TODO
109 
110  """
111 
112  # Computation of the covariance matrix
113  C = np.dot(np.transpose(P), Q)
114 
115  # Computation of the optimal rotation matrix
116  # This can be done using singular value decomposition (SVD)
117  # Getting the sign of the det(V)*(W) to decide
118  # whether we need to correct our rotation matrix to ensure a
119  # right-handed coordinate system.
120  # And finally calculating the optimal rotation matrix U
121  # see http://en.wikipedia.org/wiki/Kabsch_algorithm
122  V, S, W = np.linalg.svd(C)
123  d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
124 
125  if d:
126  S[-1] = -S[-1]
127  V[:, -1] = -V[:, -1]
128 
129  # Create Rotation matrix U
130  U = np.dot(V, W)
131 
132  return U
133 
134 
135 def quaternion_rmsd(P, Q):
136  """
137  Rotate matrix P unto Q and calculate the RMSD
138 
139  based on doi:10.1016/1049-9660(91)90036-O
140 
141  Parameters
142  ----------
143  P : array
144  (N,D) matrix, where N is points and D is dimension.
145  P : array
146  (N,D) matrix, where N is points and D is dimension.
147 
148  Returns
149  -------
150  rmsd : float
151  """
152  rot = quaternion_rotate(P, Q)
153  P = np.dot(P, rot)
154  return rmsd(P, Q)
155 
156 
158  """
159  Get optimal rotation
160  note: translation will be zero when the centroids of each molecule are the
161  same
162  """
163  Wt_r = makeW(*r).T
164  Q_r = makeQ(*r)
165  rot = Wt_r.dot(Q_r)[:3, :3]
166  return rot
167 
168 
169 def makeW(r1, r2, r3, r4=0):
170  """
171  matrix involved in quaternion rotation
172  """
173  W = np.asarray([
174  [r4, r3, -r2, r1],
175  [-r3, r4, r1, r2],
176  [r2, -r1, r4, r3],
177  [-r1, -r2, -r3, r4]])
178  return W
179 
180 
181 def makeQ(r1, r2, r3, r4=0):
182  """
183  matrix involved in quaternion rotation
184  """
185  Q = np.asarray([
186  [r4, -r3, r2, r1],
187  [r3, r4, -r1, r2],
188  [-r2, r1, r4, r3],
189  [-r1, -r2, -r3, r4]])
190  return Q
191 
192 
194  """
195  Calculate the rotation
196 
197  Parameters
198  ----------
199  X : array
200  (N,D) matrix, where N is points and D is dimension.
201  Y: array
202  (N,D) matrix, where N is points and D is dimension.
203 
204  Returns
205  -------
206  rot : matrix
207  Rotation matrix (D,D)
208  """
209  N = X.shape[0]
210  W = np.asarray([makeW(*Y[k]) for k in range(N)])
211  Q = np.asarray([makeQ(*X[k]) for k in range(N)])
212  Qt_dot_W = np.asarray([np.dot(Q[k].T, W[k]) for k in range(N)])
213  W_minus_Q = np.asarray([W[k] - Q[k] for k in range(N)])
214  A = np.sum(Qt_dot_W, axis=0)
215  eigen = np.linalg.eigh(A)
216  r = eigen[1][:, eigen[0].argmax()]
217  rot = quaternion_transform(r)
218  return rot
219 
220 
221 def centroid(X):
222  """
223  Calculate the centroid from a vectorset X.
224 
225  https://en.wikipedia.org/wiki/Centroid
226  Centroid is the mean position of all the points in all of the coordinate
227  directions.
228 
229  C = sum(X)/len(X)
230 
231  Parameters
232  ----------
233  X : array
234  (N,D) matrix, where N is points and D is dimension.
235 
236  Returns
237  -------
238  C : float
239  centeroid
240 
241  """
242  C = X.mean(axis=0)
243  return C
244 
245 
246 def rmsd(V, W):
247  """
248  Calculate Root-mean-square deviation from two sets of vectors V and W.
249 
250  Parameters
251  ----------
252  V : array
253  (N,D) matrix, where N is points and D is dimension.
254  W : array
255  (N,D) matrix, where N is points and D is dimension.
256 
257  Returns
258  -------
259  rmsd : float
260  Root-mean-square deviation
261 
262  """
263  D = len(V[0])
264  N = len(V)
265  rmsd = 0.0
266  for v, w in zip(V, W):
267  rmsd += sum([(v[i] - w[i])**2.0 for i in range(D)])
268  return np.sqrt(rmsd/N)
269 
270 
271 def write_coordinates(atoms, V, title=""):
272  """
273  Print coordinates V with corresponding atoms to stdout in XYZ format.
274 
275  Parameters
276  ----------
277  atoms : list
278  List of atomic types
279  V : array
280  (N,3) matrix of atomic coordinates
281  title : string (optional)
282  Title of molecule
283 
284  """
285  N, D = V.shape
286 
287  print(str(N))
288  print(title)
289 
290  for i in range(N):
291  atom = atoms[i]
292  atom = atom[0].upper() + atom[1:]
293  print("{0:2s} {1:15.8f} {2:15.8f} {3:15.8f}".format(
294  atom, V[i, 0], V[i, 1], V[i, 2]))
295 
296 
297 def get_coordinates(filename, fmt):
298  """
299  Get coordinates from filename in format fmt. Supports XYZ and PDB.
300 
301  Parameters
302  ----------
303  filename : string
304  Filename to read
305  fmt : string
306  Format of filename. Either xyz or pdb.
307 
308  Returns
309  -------
310  atoms : list
311  List of atomic types
312  V : array
313  (N,3) where N is number of atoms
314 
315  """
316  if fmt == "xyz":
317  return get_coordinates_xyz(filename)
318  elif fmt == "pdb":
319  return get_coordinates_pdb(filename)
320  exit("Could not recognize file format: {:s}".format(fmt))
321 
322 
323 def get_coordinates_pdb(filename):
324  """
325  Get coordinates from the first chain in a pdb file
326  and return a vectorset with all the coordinates.
327 
328  Parameters
329  ----------
330  filename : string
331  Filename to read
332 
333  Returns
334  -------
335  atoms : list
336  List of atomic types
337  V : array
338  (N,3) where N is number of atoms
339 
340  """
341  # PDB files tend to be a bit of a mess. The x, y and z coordinates
342  # are supposed to be in column 31-38, 39-46 and 47-54, but this is
343  # not always the case.
344  # Because of this the three first columns containing a decimal is used.
345  # Since the format doesn't require a space between columns, we use the
346  # above column indices as a fallback.
347  x_column = None
348  V = list()
349  # Same with atoms and atom naming.
350  # The most robust way to do this is probably
351  # to assume that the atomtype is given in column 3.
352  atoms = list()
353 
354  with open(filename, 'r') as f:
355  lines = f.readlines()
356  for line in lines:
357  if line.startswith("TER") or line.startswith("END"):
358  break
359  if line.startswith("ATOM"):
360  tokens = line.split()
361  # Try to get the atomtype
362  try:
363  atom = tokens[2][0]
364  if atom in ("H", "C", "N", "O", "S", "P"):
365  atoms.append(atom)
366  else:
367  # e.g. 1HD1
368  atom = tokens[2][1]
369  if atom == "H":
370  atoms.append(atom)
371  else:
372  raise Exception
373  except:
374  exit("Error parsing atomtype for the following line: \n{0:s}".format(line))
375 
376  if x_column == None:
377  try:
378  # look for x column
379  for i, x in enumerate(tokens):
380  if "." in x and "." in tokens[i + 1] and "." in tokens[i + 2]:
381  x_column = i
382  break
383  except IndexError:
384  exit("Error parsing coordinates for the following line: \n{0:s}".format(line))
385  # Try to read the coordinates
386  try:
387  V.append(np.asarray(tokens[x_column:x_column + 3], dtype=float))
388  except:
389  # If that doesn't work, use hardcoded indices
390  try:
391  x = line[30:38]
392  y = line[38:46]
393  z = line[46:54]
394  V.append(np.asarray([x, y ,z], dtype=float))
395  except:
396  exit("Error parsing input for the following line: \n{0:s}".format(line))
397 
398 
399  V = np.asarray(V)
400  atoms = np.asarray(atoms)
401  assert(V.shape[0] == atoms.size)
402  return atoms, V
403 
404 
405 def get_coordinates_xyz(filename):
406  """
407  Get coordinates from filename and return a vectorset with all the
408  coordinates, in XYZ format.
409 
410  Parameters
411  ----------
412  filename : string
413  Filename to read
414 
415  Returns
416  -------
417  atoms : list
418  List of atomic types
419  V : array
420  (N,3) where N is number of atoms
421 
422  """
423 
424  f = open(filename, 'r')
425  V = list()
426  atoms = list()
427  n_atoms = 0
428 
429  # Read the first line to obtain the number of atoms to read
430  try:
431  n_atoms = int(f.readline())
432  except ValueError:
433  exit("Could not obtain the number of atoms in the .xyz file.")
434 
435  # Skip the title line
436  f.readline()
437 
438  # Use the number of atoms to not read beyond the end of a file
439  for lines_read, line in enumerate(f):
440 
441  if lines_read == n_atoms:
442  break
443 
444  atom = re.findall(r'[a-zA-Z]+', line)[0]
445  atom = atom.upper()
446 
447  numbers = re.findall(r'[-]?\d+\.\d*(?:[Ee][-\+]\d+)?', line)
448  numbers = [float(number) for number in numbers]
449 
450  # The numbers are not valid unless we obtain exacly three
451  if len(numbers) == 3:
452  V.append(np.array(numbers))
453  atoms.append(atom)
454  else:
455  exit("Reading the .xyz file failed in line {0}. Please check the format.".format(lines_read + 2))
456 
457  f.close()
458  atoms = np.array(atoms)
459  V = np.array(V)
460  return atoms, V
461 
462 
463 def main():
464 
465  import argparse
466  import sys
467 
468  description = """
469 Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ
470 or PDB format, using transformation and rotation. The order of the atoms *must*
471 be the same for both structures.
472 
473 For more information, usage, example and citation read more at
474 https://github.com/charnley/rmsd
475 """
476 
477  epilog = """output:
478  Normal - RMSD calculated the straight-forward way, no translation or rotation.
479  Kabsch - RMSD after coordinates are translated and rotated using Kabsch.
480  Quater - RMSD after coordinates are translated and rotated using quaternions.
481 """
482 
483  parser = argparse.ArgumentParser(
484  usage='%(prog)s [options] structure_a structure_b',
485  description=description,
486  formatter_class=argparse.RawDescriptionHelpFormatter,
487  epilog=epilog)
488 
489  parser.add_argument('-v', '--version', action='version', version='rmsd ' + __version__ + "\nhttps://github.com/charnley/rmsd")
490 
491  parser.add_argument('structure_a', metavar='structure_a', type=str, help='Structure in .xyz or .pdb format')
492  parser.add_argument('structure_b', metavar='structure_b', type=str)
493  parser.add_argument('-o', '--output', action='store_true', help='print out structure A, centered and rotated unto structure B\'s coordinates in XYZ format')
494  parser.add_argument('-f', '--format', action='store', help='Format of input files. Valid format are XYZ and PDB', metavar='fmt')
495 
496  parser.add_argument('-m', '--normal', action='store_true', help='Use no transformation')
497  parser.add_argument('-k', '--kabsch', action='store_true', help='Use Kabsch algorithm for transformation')
498  parser.add_argument('-q', '--quater', action='store_true', help='Use Quaternion algorithm for transformation')
499 
500  index_group = parser.add_mutually_exclusive_group()
501  index_group.add_argument('-n', '--no-hydrogen', action='store_true', help='ignore hydrogens when calculating RMSD')
502  index_group.add_argument('-r', '--remove-idx', nargs='+', type=int, help='index list of atoms NOT to consider', metavar='idx')
503  index_group.add_argument('-a', '--add-idx', nargs='+', type=int, help='index list of atoms to consider', metavar='idx')
504 
505 
506  if len(sys.argv) == 1:
507  parser.print_help()
508  sys.exit(1)
509 
510  args = parser.parse_args()
511 
512  # As default use all three methods
513  if not args.normal and not args.kabsch and not args.quater:
514  args.normal = True
515  args.kabsch = True
516  args.quater = True
517 
518  # As default, load the extension as format
519  if args.format == None:
520  args.format = args.structure_a.split('.')[-1]
521 
522  p_atoms, p_all = get_coordinates(args.structure_a, args.format)
523  q_atoms, q_all = get_coordinates(args.structure_b, args.format)
524 
525  if np.count_nonzero(p_atoms != q_atoms):
526  exit("Atoms not in the same order")
527 
528  if args.no_hydrogen:
529  not_hydrogens = np.where(p_atoms != 'H')
530  P = p_all[not_hydrogens]
531  Q = q_all[not_hydrogens]
532 
533  elif args.remove_idx:
534  N, = p_atoms.shape
535  index = range(N)
536  index = set(index) - set(args.remove_idx)
537  index = list(index)
538  P = p_all[index]
539  Q = q_all[index]
540 
541  elif args.add_idx:
542  P = p_all[args.add_idx]
543  Q = q_all[args.add_idx]
544 
545  else:
546  P = p_all[:]
547  Q = q_all[:]
548 
549 
550  # Calculate 'dumb' RMSD
551  if args.normal and not args.output:
552  normal_rmsd = rmsd(P, Q)
553  print("Normal RMSD: {0}".format(normal_rmsd))
554 
555  # Create the centroid of P and Q which is the geometric center of a
556  # N-dimensional region and translate P and Q onto that center.
557  # http://en.wikipedia.org/wiki/Centroid
558  Pc = centroid(P)
559  Qc = centroid(Q)
560  P -= Pc
561  Q -= Qc
562 
563  if args.output:
564  U = kabsch(P, Q)
565  p_all -= Pc
566  p_all = np.dot(p_all, U)
567  p_all += Qc
568  write_coordinates(p_atoms, p_all, title="{} translated".format(args.structure_a))
569  quit()
570 
571  if args.kabsch:
572  print("Kabsch RMSD: {0}".format(kabsch_rmsd(P, Q)))
573 
574  if args.quater:
575  print("Quater RMSD: {0}".format(quaternion_rmsd(P, Q)))
576 
577 
578 if __name__ == "__main__":
579  main()
def makeW(r1, r2, r3, r4=0)
def get_coordinates(filename, fmt)
def makeQ(r1, r2, r3, r4=0)
static std::string print(const transformation &tf)
static const textual_icon exit
Definition: model-views.h:254
def write_coordinates(atoms, V, title="")


librealsense2
Author(s): Sergey Dorodnicov , Doron Hirshberg , Mark Horn , Reagan Lopez , Itay Carpis
autogenerated on Mon May 3 2021 02:45:07