5 Calculate Root-mean-square deviation (RMSD) of Two Molecules Using Rotation 
    6 =========================================================================== 
    8 Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ 
    9 or PDB format, using transformation and rotation. The order of the atoms *must* 
   10 be the same for both structures. 
   12 For more information, usage, example and citation read more at 
   13 https://github.com/charnley/rmsd 
   34     Rotate matrix P unto Q using Kabsch algorithm and calculate the RMSD. 
   39         (N,D) matrix, where N is points and D is dimension. 
   41         (N,D) matrix, where N is points and D is dimension. 
   46         root-mean squared deviation 
   54     Rotate matrix P unto matrix Q using Kabsch algorithm. 
   59         (N,D) matrix, where N is points and D is dimension. 
   61         (N,D) matrix, where N is points and D is dimension. 
   66         (N,D) matrix, where N is points and D is dimension, 
   79     The optimal rotation matrix U is calculated and then used to rotate matrix 
   80     P unto matrix Q so the minimum root-mean-square deviation (RMSD) can be 
   83     Using the Kabsch algorithm with two sets of paired point P and Q, centered 
   84     around the centroid. Each vector set is represented as an NxD 
   85     matrix, where D is the the dimension of the space. 
   87     The algorithm works in three steps: 
   88     - a translation of P and Q 
   89     - the computation of a covariance matrix C 
   90     - computation of the optimal rotation matrix U 
   92     http://en.wikipedia.org/wiki/Kabsch_algorithm 
   97         (N,D) matrix, where N is points and D is dimension. 
   99         (N,D) matrix, where N is points and D is dimension. 
  104         Rotation matrix (D,D) 
  113     C = np.dot(np.transpose(P), Q)
 
  122     V, S, W = np.linalg.svd(C)
 
  123     d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
 
  137     Rotate matrix P unto Q and calculate the RMSD 
  139     based on doi:10.1016/1049-9660(91)90036-O 
  144         (N,D) matrix, where N is points and D is dimension. 
  146         (N,D) matrix, where N is points and D is dimension. 
  160     note: translation will be zero when the centroids of each molecule are the 
  165     rot = Wt_r.dot(Q_r)[:3, :3]
 
  171     matrix involved in quaternion rotation 
  177              [-r1, -r2, -r3, r4]])
 
  183     matrix involved in quaternion rotation 
  189              [-r1, -r2, -r3, r4]])
 
  195     Calculate the rotation 
  200         (N,D) matrix, where N is points and D is dimension. 
  202         (N,D) matrix, where N is points and D is dimension. 
  207         Rotation matrix (D,D) 
  210     W = np.asarray([
makeW(*Y[k]) 
for k 
in range(N)])
 
  211     Q = np.asarray([
makeQ(*X[k]) 
for k 
in range(N)])
 
  212     Qt_dot_W = np.asarray([np.dot(Q[k].T, W[k]) 
for k 
in range(N)])
 
  213     W_minus_Q = np.asarray([W[k] - Q[k] 
for k 
in range(N)])
 
  214     A = np.sum(Qt_dot_W, axis=0)
 
  215     eigen = np.linalg.eigh(A)
 
  216     r = eigen[1][:, eigen[0].argmax()]
 
  223     Calculate the centroid from a vectorset X. 
  225     https://en.wikipedia.org/wiki/Centroid 
  226     Centroid is the mean position of all the points in all of the coordinate 
  234         (N,D) matrix, where N is points and D is dimension. 
  248     Calculate Root-mean-square deviation from two sets of vectors V and W. 
  253         (N,D) matrix, where N is points and D is dimension. 
  255         (N,D) matrix, where N is points and D is dimension. 
  260         Root-mean-square deviation 
  266     for v, w 
in zip(V, W):
 
  267         rmsd += sum([(v[i] - w[i])**2.0 
for i 
in range(D)])
 
  268     return np.sqrt(rmsd/N)
 
  273     Print coordinates V with corresponding atoms to stdout in XYZ format. 
  280         (N,3) matrix of atomic coordinates 
  281     title : string (optional) 
  292         atom = atom[0].upper() + atom[1:]
 
  293         print(
"{0:2s} {1:15.8f} {2:15.8f} {3:15.8f}".format(
 
  294                 atom, V[i, 0], V[i, 1], V[i, 2]))
 
  299     Get coordinates from filename in format fmt. Supports XYZ and PDB. 
  306         Format of filename. Either xyz or pdb. 
  313         (N,3) where N is number of atoms 
  320     exit(
"Could not recognize file format: {:s}".format(fmt))
 
  325     Get coordinates from the first chain in a pdb file 
  326     and return a vectorset with all the coordinates. 
  338         (N,3) where N is number of atoms 
  354     with open(filename, 
'r') 
as f:
 
  355         lines = f.readlines()
 
  357             if line.startswith(
"TER") 
or line.startswith(
"END"):
 
  359             if line.startswith(
"ATOM"):
 
  360                 tokens = line.split()
 
  364                     if atom 
in (
"H", 
"C", 
"N", 
"O", 
"S", 
"P"):
 
  374                         exit(
"Error parsing atomtype for the following line: \n{0:s}".format(line))
 
  379                         for i, x 
in enumerate(tokens):
 
  380                             if "." in x 
and "." in tokens[i + 1] 
and "." in tokens[i + 2]:
 
  384                         exit(
"Error parsing coordinates for the following line: \n{0:s}".format(line))
 
  387                     V.append(np.asarray(tokens[x_column:x_column + 3], dtype=float))
 
  394                         V.append(np.asarray([x, y ,z], dtype=float))
 
  396                         exit(
"Error parsing input for the following line: \n{0:s}".format(line))
 
  400     atoms = np.asarray(atoms)
 
  401     assert(V.shape[0] == atoms.size)
 
  407     Get coordinates from filename and return a vectorset with all the 
  408     coordinates, in XYZ format. 
  420         (N,3) where N is number of atoms 
  424     f = open(filename, 
'r')
 
  431         n_atoms = int(f.readline())
 
  433         exit(
"Could not obtain the number of atoms in the .xyz file.")
 
  439     for lines_read, line 
in enumerate(f):
 
  441         if lines_read == n_atoms:
 
  444         atom = re.findall(
r'[a-zA-Z]+', line)[0]
 
  447         numbers = re.findall(
r'[-]?\d+\.\d*(?:[Ee][-\+]\d+)?', line)
 
  448         numbers = [float(number) 
for number 
in numbers]
 
  451         if len(numbers) == 3:
 
  452             V.append(np.array(numbers))
 
  455             exit(
"Reading the .xyz file failed in line {0}. Please check the format.".format(lines_read + 2))
 
  458     atoms = np.array(atoms)
 
  469 Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ 
  470 or PDB format, using transformation and rotation. The order of the atoms *must* 
  471 be the same for both structures. 
  473 For more information, usage, example and citation read more at 
  474 https://github.com/charnley/rmsd 
  478   Normal - RMSD calculated the straight-forward way, no translation or rotation. 
  479   Kabsch - RMSD after coordinates are translated and rotated using Kabsch. 
  480   Quater - RMSD after coordinates are translated and rotated using quaternions. 
  483     parser = argparse.ArgumentParser(
 
  484                     usage=
'%(prog)s [options] structure_a structure_b',
 
  485                     description=description,
 
  486                     formatter_class=argparse.RawDescriptionHelpFormatter,
 
  489     parser.add_argument(
'-v', 
'--version', action=
'version', version=
'rmsd ' + __version__ + 
"\nhttps://github.com/charnley/rmsd")
 
  491     parser.add_argument(
'structure_a', metavar=
'structure_a', type=str, help=
'Structure in .xyz or .pdb format')
 
  492     parser.add_argument(
'structure_b', metavar=
'structure_b', type=str)
 
  493     parser.add_argument(
'-o', 
'--output', action=
'store_true', help=
'print out structure A, centered and rotated unto structure B\'s coordinates in XYZ format')
 
  494     parser.add_argument(
'-f', 
'--format', action=
'store', help=
'Format of input files. Valid format are XYZ and PDB', metavar=
'fmt')
 
  496     parser.add_argument(
'-m', 
'--normal', action=
'store_true', help=
'Use no transformation')
 
  497     parser.add_argument(
'-k', 
'--kabsch', action=
'store_true', help=
'Use Kabsch algorithm for transformation')
 
  498     parser.add_argument(
'-q', 
'--quater', action=
'store_true', help=
'Use Quaternion algorithm for transformation')
 
  500     index_group = parser.add_mutually_exclusive_group()
 
  501     index_group.add_argument(
'-n', 
'--no-hydrogen', action=
'store_true', help=
'ignore hydrogens when calculating RMSD')
 
  502     index_group.add_argument(
'-r', 
'--remove-idx', nargs=
'+', type=int, help=
'index list of atoms NOT to consider', metavar=
'idx')
 
  503     index_group.add_argument(
'-a', 
'--add-idx', nargs=
'+', type=int, help=
'index list of atoms to consider', metavar=
'idx')
 
  506     if len(sys.argv) == 1:
 
  510     args = parser.parse_args()
 
  513     if not args.normal 
and not args.kabsch 
and not args.quater:
 
  519     if args.format == 
None:
 
  520         args.format = args.structure_a.split(
'.')[-1]
 
  525     if np.count_nonzero(p_atoms != q_atoms):
 
  526         exit(
"Atoms not in the same order")
 
  529         not_hydrogens = np.where(p_atoms != 
'H')
 
  530         P = p_all[not_hydrogens]
 
  531         Q = q_all[not_hydrogens]
 
  533     elif args.remove_idx:
 
  536         index = set(index) - set(args.remove_idx)
 
  542         P = p_all[args.add_idx]
 
  543         Q = q_all[args.add_idx]
 
  551     if args.normal 
and not args.output:
 
  552         normal_rmsd = 
rmsd(P, Q)
 
  553         print(
"Normal RMSD: {0}".format(normal_rmsd))
 
  566         p_all = np.dot(p_all, U)
 
  568         write_coordinates(p_atoms, p_all, title=
"{} translated".format(args.structure_a))
 
  578 if __name__ == 
"__main__":