pdb.c
Go to the documentation of this file.
1 /************************************************************************/
12 /************************************************************************/
13 #include <GKlib.h>
14 
15 /************************************************************************/
23 /************************************************************************/
24 char gk_threetoone(char *res) { /* {{{ */
25  /* make sure the matching works */
26  res[0] = toupper(res[0]);
27  res[1] = toupper(res[1]);
28  res[2] = toupper(res[2]);
29  if(strcmp(res,"ALA") == 0) {
30  return 'A';
31  }
32  else if(strcmp(res,"CYS") == 0) {
33  return 'C';
34  }
35  else if(strcmp(res,"ASP") == 0) {
36  return 'D';
37  }
38  else if(strcmp(res,"GLU") == 0) {
39  return 'E';
40  }
41  else if(strcmp(res,"PHE") == 0) {
42  return 'F';
43  }
44  else if(strcmp(res,"GLY") == 0) {
45  return 'G';
46  }
47  else if(strcmp(res,"HIS") == 0) {
48  return 'H';
49  }
50  else if(strcmp(res,"ILE") == 0) {
51  return 'I';
52  }
53  else if(strcmp(res,"LYS") == 0) {
54  return 'K';
55  }
56  else if(strcmp(res,"LEU") == 0) {
57  return 'L';
58  }
59  else if(strcmp(res,"MET") == 0) {
60  return 'M';
61  }
62  else if(strcmp(res,"ASN") == 0) {
63  return 'N';
64  }
65  else if(strcmp(res,"PRO") == 0) {
66  return 'P';
67  }
68  else if(strcmp(res,"GLN") == 0) {
69  return 'Q';
70  }
71  else if(strcmp(res,"ARG") == 0) {
72  return 'R';
73  }
74  else if(strcmp(res,"SER") == 0) {
75  return 'S';
76  }
77  else if(strcmp(res,"THR") == 0) {
78  return 'T';
79  }
80  else if(strcmp(res,"SCY") == 0) {
81  return 'U';
82  }
83  else if(strcmp(res,"VAL") == 0) {
84  return 'V';
85  }
86  else if(strcmp(res,"TRP") == 0) {
87  return 'W';
88  }
89  else if(strcmp(res,"TYR") == 0) {
90  return 'Y';
91  }
92  else {
93  return 'X';
94  }
95 } /* }}} */
96 
97 /************************************************************************/
104 /************************************************************************/
105 void gk_freepdbf(pdbf *p) { /* {{{ */
106  int i;
107  if(p != NULL) {
108  gk_free((void **)&p->resSeq, LTERM);
109  for(i=0; i<p->natoms; i++) {
110  gk_free((void **)&p->atoms[i].name, &p->atoms[i].resname, LTERM);
111  }
112  for(i=0; i<p->nresidues; i++) {
113  gk_free((void *)&p->threeresSeq[i], LTERM);
114  }
115  /* this may look like it's wrong, but it's just a 1-d array of pointers, and
116  the pointers themselves are freed above */
117  gk_free((void **)&p->bbs, &p->cas, &p->atoms, &p->cm, &p->threeresSeq, LTERM);
118  }
119  gk_free((void **)&p, LTERM);
120 } /* }}} */
121 
122 /************************************************************************/
131 /************************************************************************/
132 pdbf *gk_readpdbfile(char *fname) { /* {{{ */
133  int i=0, res=0;
134  char linetype[6];
135  int aserial;
136  char aname[5] = " \0";
137  char altLoc = ' ';
138  char rname[4] = " \0";
139  char chainid = ' ';
140  char oldchainid = ' ';
141  int rserial;
142  int oldRserial = -37;
143  char icode = ' ';
144  char element = ' ';
145  double x;
146  double y;
147  double z;
148  double avgx;
149  double avgy;
150  double avgz;
151  double opcy;
152  double tmpt;
153  char line[MAXLINELEN];
154  int corruption=0;
155  int nresatoms;
156 
157  int atoms=0, residues=0, cas=0, bbs=0, firstres=1;
158  pdbf *toFill = gk_malloc(sizeof(pdbf),"fillme");
159  FILE *FPIN;
160 
161  FPIN = gk_fopen(fname,"r",fname);
162  while(fgets(line, 256, FPIN)) {
163  sscanf(line,"%s ",linetype);
164  /* It seems the only reliable parts are through temperature, so we only use these parts */
165  /* if(strstr(linetype, "ATOM") != NULL || strstr(linetype, "HETATM") != NULL) { */
166  if(strstr(linetype, "ATOM") != NULL) {
167  sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n",
168  linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element);
169  sscanf(linetype, " %s ",linetype);
170  sscanf(aname, " %s ",aname);
171  sscanf(rname, " %s ",rname);
172  if(altLoc != ' ') {
173  corruption = corruption|CRP_ALTLOCS;
174  }
175 
176  if(firstres == 1) {
177  oldRserial = rserial;
178  oldchainid = chainid;
179  residues++;
180  firstres = 0;
181  }
182  if(oldRserial != rserial) {
183  residues++;
184  oldRserial = rserial;
185  }
186  if(oldchainid != chainid) {
187  corruption = corruption|CRP_MULTICHAIN;
188  }
189  oldchainid = chainid;
190  atoms++;
191  if(strcmp(aname,"CA") == 0) {
192  cas++;
193  }
194  if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 ||
195  strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) {
196  bbs++;
197  }
198  }
199  else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) {
200  break;
201  }
202  }
203  fclose(FPIN);
204 
205  /* printf("File has coordinates for %d atoms in %d residues\n",atoms,residues); */
206  toFill->natoms = atoms;
207  toFill->ncas = cas;
208  toFill->nbbs = bbs;
209  toFill->nresidues = residues;
210  toFill->resSeq = (char *) gk_malloc (residues*sizeof(char),"residue seq");
211  toFill->threeresSeq = (char **)gk_malloc (residues*sizeof(char *),"residue seq");
212  toFill->atoms = (atom *) gk_malloc (atoms*sizeof(atom), "atoms");
213  toFill->bbs = (atom **)gk_malloc ( bbs*sizeof(atom *),"bbs");
214  toFill->cas = (atom **)gk_malloc ( cas*sizeof(atom *),"cas");
215  toFill->cm = (center_of_mass *)gk_malloc(residues*sizeof(center_of_mass),"center of mass");
216  res=0; firstres=1; cas=0; bbs=0; i=0;
217  avgx = 0.0; avgy = 0.0; avgz = 0.0;
218  nresatoms = 0;
219 
220  FPIN = gk_fopen(fname,"r",fname);
221  while(fgets(line, 256, FPIN)) {
222  sscanf(line,"%s ",linetype);
223  /* It seems the only reliable parts are through temperature, so we only use these parts */
224  /* if(strstr(linetype, "ATOM") != NULL || strstr(linetype, "HETATM") != NULL) { */
225  if(strstr(linetype, "ATOM") != NULL ) {
226 
227  /* to ensure our memory doesn't get corrupted by the biologists, we only read this far */
228  sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n",
229  linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element);
230  sscanf(aname, "%s",aname);
231  sscanf(rname, "%s",rname);
232 
233  if(firstres == 1) {
234  toFill->resSeq[res] = gk_threetoone(rname);
235  toFill->threeresSeq[res] = gk_strdup(rname);
236  oldRserial = rserial;
237  res++;
238  firstres = 0;
239  }
240  if(oldRserial != rserial) {
241  /* we're changing residues. store the center of mass from the last one & reset */
242  toFill->cm[res-1].x = avgx/nresatoms;
243  toFill->cm[res-1].y = avgy/nresatoms;
244  toFill->cm[res-1].z = avgz/nresatoms;
245  avgx = 0.0; avgy = 0.0; avgz = 0.0;
246  nresatoms = 0;
247  toFill->cm[res-1].name = toFill->resSeq[res-1];
248 
249  toFill->threeresSeq[res] = gk_strdup(rname);
250  toFill->resSeq[res] = gk_threetoone(rname);
251  res++;
252  oldRserial = rserial;
253  }
254  avgx += x;
255  avgy += y;
256  avgz += z;
257  nresatoms++;
258 
259  toFill->atoms[i].x = x;
260  toFill->atoms[i].y = y;
261  toFill->atoms[i].z = z;
262  toFill->atoms[i].opcy = opcy;
263  toFill->atoms[i].tmpt = tmpt;
264  toFill->atoms[i].element = element;
265  toFill->atoms[i].serial = aserial;
266  toFill->atoms[i].chainid = chainid;
267  toFill->atoms[i].altLoc = altLoc;
268  toFill->atoms[i].rserial = rserial;
269  toFill->atoms[i].icode = icode;
270  toFill->atoms[i].name = gk_strdup(aname);
271  toFill->atoms[i].resname = gk_strdup(rname);
272  /* Set up pointers for the backbone and c-alpha shortcuts */
273  if(strcmp(aname,"CA") == 0) {
274  toFill->cas[cas] = &(toFill->atoms[i]);
275  cas++;
276  }
277  if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 || strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) {
278  toFill->bbs[bbs] = &(toFill->atoms[i]);
279  bbs++;
280  }
281  i++;
282  }
283  else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) {
284  break;
285  }
286  }
287  /* get that last average */
288  toFill->cm[res-1].x = avgx/nresatoms;
289  toFill->cm[res-1].y = avgy/nresatoms;
290  toFill->cm[res-1].z = avgz/nresatoms;
291  /* Begin test code */
292  if(cas != residues) {
293  printf("Number of residues and CA coordinates differs by %d (!)\n",residues-cas);
294  if(cas < residues) {
295  corruption = corruption|CRP_MISSINGCA;
296  }
297  else if(cas > residues) {
298  corruption = corruption|CRP_MULTICA;
299  }
300  }
301  if(bbs < residues*4) {
302  corruption = corruption|CRP_MISSINGBB;
303  }
304  else if(bbs > residues*4) {
305  corruption = corruption|CRP_MULTIBB;
306  }
307  fclose(FPIN);
308  toFill->corruption = corruption;
309  /* if(corruption == 0)
310  printf("File was clean!\n"); */
311  return(toFill);
312 } /* }}} */
313 
314 /************************************************************************/
325 /************************************************************************/
326 void gk_writefastafrompdb(pdbf *pb, char *fname) {
327  int i;
328  FILE *FPOUT;
329 
330  FPOUT = gk_fopen(fname,"w",fname);
331  fprintf(FPOUT,"> %s\n",fname);
332 
333  for(i=0; i<pb->nresidues; i++)
334  fprintf(FPOUT,"%c",pb->resSeq[i]);
335 
336  fprintf(FPOUT,"\n");
337  fclose(FPOUT);
338 }
339 
340 /************************************************************************/
349 /************************************************************************/
350 void gk_writecentersofmass(pdbf *p, char *fname) {
351  int i;
352  FILE *FPIN;
353  FPIN = gk_fopen(fname,"w",fname);
354  for(i=0; i<p->nresidues; i++) {
355  fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
356  "ATOM ",i,"CA",' ',p->threeresSeq[i],' ',i,' ',p->cm[i].x,p->cm[i].y,p->cm[i].z,1.0,-37.0);
357  }
358  fclose(FPIN);
359 }
360 
361 /************************************************************************/
370 /************************************************************************/
371 void gk_writefullatom(pdbf *p, char *fname) {
372  int i;
373  FILE *FPIN;
374  FPIN = gk_fopen(fname,"w",fname);
375  for(i=0; i<p->natoms; i++) {
376  fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
377  "ATOM ",p->atoms[i].serial,p->atoms[i].name,p->atoms[i].altLoc,p->atoms[i].resname,p->atoms[i].chainid,p->atoms[i].rserial,p->atoms[i].icode,p->atoms[i].x,p->atoms[i].y,p->atoms[i].z,p->atoms[i].opcy,p->atoms[i].tmpt);
378  }
379  fclose(FPIN);
380 }
381 
382 /************************************************************************/
391 /************************************************************************/
392 void gk_writebackbone(pdbf *p, char *fname) {
393  int i;
394  FILE *FPIN;
395  FPIN = gk_fopen(fname,"w",fname);
396  for(i=0; i<p->nbbs; i++) {
397  fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
398  "ATOM ",p->bbs[i]->serial,p->bbs[i]->name,p->bbs[i]->altLoc,p->bbs[i]->resname,p->bbs[i]->chainid,p->bbs[i]->rserial,p->bbs[i]->icode,p->bbs[i]->x,p->bbs[i]->y,p->bbs[i]->z,p->bbs[i]->opcy,p->bbs[i]->tmpt);
399  }
400  fclose(FPIN);
401 }
402 
403 /************************************************************************/
412 /************************************************************************/
413 void gk_writealphacarbons(pdbf *p, char *fname) {
414  int i;
415  FILE *FPIN;
416  FPIN = gk_fopen(fname,"w",fname);
417  for(i=0; i<p->ncas; i++) {
418  fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
419  "ATOM ",p->cas[i]->serial,p->cas[i]->name,p->cas[i]->altLoc,p->cas[i]->resname,p->cas[i]->chainid,p->cas[i]->rserial,p->cas[i]->icode,p->cas[i]->x,p->cas[i]->y,p->cas[i]->z,p->cas[i]->opcy,p->cas[i]->tmpt);
420  }
421  fclose(FPIN);
422 }
423 
424 /************************************************************************/
434 /************************************************************************/
436  int corruption = p->corruption;
437  if(corruption&CRP_ALTLOCS)
438  printf("Multiple coordinate sets for at least one atom\n");
439  if(corruption&CRP_MISSINGCA)
440  printf("Missing coordiantes for at least one CA atom\n");
441  if(corruption&CRP_MISSINGBB)
442  printf("Missing coordiantes for at least one backbone atom (N,CA,C,O)\n");
443  if(corruption&CRP_MULTICHAIN)
444  printf("File contains coordinates for multiple chains\n");
445  if(corruption&CRP_MULTICA)
446  printf("Multiple CA atoms found for the same residue (could be alternate locators)\n");
447  if(corruption&CRP_MULTICA)
448  printf("Multiple copies of backbone atoms found for the same residue (could be alternate locators)\n");
449 }
450  /* sscanf(line, "%6s%5d%*1c%4s%1c%3s%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf%*6c%4s%2s%2s\n",
451  linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,segId,element,charge);
452  printf(".%s.%s.%s.\n",segId,element,charge);
453  printf("%-6s%5d%-1s%-4s%1c%3s%1s%1c%4d%1c%3s%8.3lf%8.3lf%8.3lf%6.2f%6.2f%6s%4s%2s%2s\n",
454  linetype,aserial," ",aname,altLoc,rname," ",chainid,rserial,icode," ",x,y,z,opcy,tmpt," ",segId,element,charge); */
455 
456  /* and we could probably get away with this using astral files, */
457  /* sscanf(line, "%6s%5d%*1c%4s%1c%3s%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf%*6c%6s\n",
458  linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,element);
459  printf("%-6s%5d%-1s%-4s%1c%3s%1s%1c%4d%1c%3s%8.3lf%8.3lf%8.3lf%6.2f%6.2f%6s%6s\n",
460  linetype,aserial," ",aname,altLoc,rname," ",chainid,rserial,icode," ",x,y,z,opcy,tmpt," ",element); */
double y
Definition: gk_struct.h:158
FILE * gk_fopen(char *, char *, const char *)
Definition: GKlib/io.c:24
#define CRP_MULTICHAIN
Definition: gk_defs.h:30
int nbbs
Definition: gk_struct.h:184
double opcy
Definition: gk_struct.h:160
double x
Definition: gk_struct.h:157
#define CRP_MISSINGCA
Definition: gk_defs.h:28
int serial
Definition: gk_struct.h:149
char icode
Definition: gk_struct.h:155
void gk_writecentersofmass(pdbf *p, char *fname)
Writes all centers of mass in pdb-format to file fname.
Definition: pdb.c:350
void gk_writefastafrompdb(pdbf *pb, char *fname)
Writes the sequence of residues from a pdb file.
Definition: pdb.c:326
Scalar * y
char * resname
Definition: gk_struct.h:152
pdbf * gk_readpdbfile(char *fname)
Reads a pdb file into a pdbf structure.
Definition: pdb.c:132
atom ** bbs
Definition: gk_struct.h:189
#define CRP_MISSINGBB
Definition: gk_defs.h:29
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
char * name
Definition: gk_struct.h:150
char chainid
Definition: gk_struct.h:153
int corruption
Definition: gk_struct.h:185
void gk_showcorruption(pdbf *p)
Decodes the corruption bitswitch and prints any problems.
Definition: pdb.c:435
center_of_mass * cm
Definition: gk_struct.h:191
char element
Definition: gk_struct.h:156
#define CRP_ALTLOCS
Definition: gk_defs.h:27
#define NULL
Definition: ccolamd.c:609
char * gk_strdup(char *orgstr)
Duplicates a string.
Definition: string.c:372
void gk_writebackbone(pdbf *p, char *fname)
Writes out all the backbone atoms of a structure in pdb format.
Definition: pdb.c:392
void gk_freepdbf(pdbf *p)
Frees the memory of a pdbf structure.
Definition: pdb.c:105
double tmpt
Definition: gk_struct.h:161
int natoms
Definition: gk_struct.h:181
void gk_writealphacarbons(pdbf *p, char *fname)
Writes out all the alpha carbon atoms of a structure.
Definition: pdb.c:413
#define MAXLINELEN
Definition: gk_defs.h:34
void * gk_malloc(size_t nbytes, char *msg)
Definition: memory.c:140
void gk_free(void **ptr1,...)
Definition: memory.c:202
int nresidues
Definition: gk_struct.h:182
char gk_threetoone(char *res)
Converts three-letter amino acid codes to one-leter codes.
Definition: pdb.c:24
float * p
char ** threeresSeq
Definition: gk_struct.h:187
char * resSeq
Definition: gk_struct.h:186
#define CRP_MULTICA
Definition: gk_defs.h:31
char altLoc
Definition: gk_struct.h:151
atom * atoms
Definition: gk_struct.h:188
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
int rserial
Definition: gk_struct.h:154
atom ** cas
Definition: gk_struct.h:190
void gk_writefullatom(pdbf *p, char *fname)
Writes all atoms in p in pdb-format to file fname.
Definition: pdb.c:371
int ncas
Definition: gk_struct.h:183
double z
Definition: gk_struct.h:159
#define LTERM
Definition: gk_defs.h:14
#define CRP_MULTIBB
Definition: gk_defs.h:32


gtsam
Author(s):
autogenerated on Sat May 8 2021 02:43:25