seq.c
Go to the documentation of this file.
1 /*
2  *
3  * Sequence handler library by Huzefa Rangwala
4  * Date : 03.01.2007
5  *
6  *
7  *
8  */
9 
10 
11 #include <GKlib.h>
12 
13 
14 
15 
16 /*********************************************************/
17 /* ! \brief Initializes the <tt>gk_seq_t</tt> variable
18 
19 
20 
21 
22 \param A pointer to gk_seq_t itself
23 \returns null
24 */
25 /***********************************************************************/
26 
28 {
29 
30  seq->len = 0;
31  seq->sequence = NULL;
32 
33  seq->pssm = NULL;
34  seq->psfm = NULL;
35 
36  seq->name = NULL;
37 
38 }
39 
40 /***********************************************************************/
46 /*********************************************************************/
47 
49 {
50 
51 
52  int nsymbols;
53  gk_idx_t i;
54  gk_i2cc2i_t *t;
55 
56  nsymbols = strlen(alphabet);
57  t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
58  t->n = nsymbols;
59  t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
60  t->c2i = gk_imalloc(256, "gk_i2c_create_common");
61 
62 
63  gk_cset(256, -1, t->i2c);
64  gk_iset(256, -1, t->c2i);
65 
66  for(i=0;i<nsymbols;i++){
67  t->i2c[i] = alphabet[i];
68  t->c2i[(int)alphabet[i]] = i;
69  }
70 
71  return t;
72 
73 }
74 
75 
76 /*********************************************************************/
82 /********************************************************************/
84 {
85  gk_seq_t *seq;
86  gk_idx_t i, j, ii;
87  size_t ntokens, nbytes, len;
88  FILE *fpin;
89 
90 
91  gk_Tokens_t tokens;
92  static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
93  static int PSSMWIDTH = 20;
94  char *header, line[MAXLINELEN];
95  gk_i2cc2i_t *converter;
96 
97  header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
98 
99  converter = gk_i2cc2i_create_common(AAORDER);
100 
101  gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
102  len --;
103 
104  seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
105  gk_seq_init(seq);
106 
107  seq->len = len;
108  seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
109  seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
110  seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
111 
112  seq->nsymbols = PSSMWIDTH;
113  seq->name = gk_getbasename(filename);
114 
115  fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
116 
117 
118  /* Read the header line */
119  if (fgets(line, MAXLINELEN-1, fpin) == NULL)
120  errexit("Unexpected end of file: %s\n", filename);
121  gk_strtoupper(line);
122  gk_strtokenize(line, " \t\n", &tokens);
123 
124  for (i=0; i<PSSMWIDTH; i++)
125  header[i] = tokens.list[i][0];
126 
127  gk_freetokenslist(&tokens);
128 
129 
130  /* Read the rest of the lines */
131  for (i=0, ii=0; ii<len; ii++) {
132  if (fgets(line, MAXLINELEN-1, fpin) == NULL)
133  errexit("Unexpected end of file: %s\n", filename);
134  gk_strtoupper(line);
135  gk_strtokenize(line, " \t\n", &tokens);
136 
137  seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
138 
139  for (j=0; j<PSSMWIDTH; j++) {
140  seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
141  seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
142  }
143 
144 
145 
146  gk_freetokenslist(&tokens);
147  i++;
148  }
149 
150  seq->len = i; /* Reset the length if certain characters were skipped */
151 
152  gk_free((void **)&header, LTERM);
153  gk_fclose(fpin);
154 
155  return seq;
156 }
157 
158 
159 /**************************************************************************/
165 /**************************************************************************/
167 {
168  gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
169  gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
170  gk_free((void **)&seq->name, &seq->sequence, LTERM);
171  //gk_free((void **)&seq, LTERM);
172  gk_free((void **) &seq, LTERM);
173 
174 }
void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, size_t *r_max_nlntokens, size_t *r_nbytes)
Definition: fs.c:79
FILE * gk_fopen(char *, char *, const char *)
Definition: GKlib/io.c:24
ssize_t gk_idx_t
Definition: gk_types.h:22
void errexit(char *f_str,...)
Definition: error.c:54
void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens)
Definition: tokenizer.c:22
int ** pssm
Definition: gk_struct.h:215
for(size_t i=1;i< poses.size();++i)
int nsymbols
Definition: gk_struct.h:219
int * c2i
Definition: gk_struct.h:202
int len
Definition: gk_struct.h:211
void gk_seq_free(gk_seq_t *seq)
This function frees the memory allocated to the seq structure.
Definition: seq.c:166
char * gk_strtoupper(char *)
Converts a string to upper case.
Definition: string.c:328
char * gk_getbasename(char *path)
Definition: fs.c:135
char * i2c
Definition: gk_struct.h:201
int * sequence
Definition: gk_struct.h:212
void gk_freetokenslist(gk_Tokens_t *tokens)
Definition: tokenizer.c:73
char * name
Definition: gk_struct.h:217
#define NULL
Definition: ccolamd.c:609
gk_seq_t * gk_seq_ReadGKMODPSSM(char *filename)
This function reads a pssm in the format of gkmod pssm.
Definition: seq.c:83
internal::enable_if<!(symbolic::is_symbolic< FirstType >::value||symbolic::is_symbolic< LastType >::value), ArithmeticSequence< typename internal::cleanup_index_type< FirstType >::type, Index > >::type seq(FirstType f, LastType l)
int ** psfm
Definition: gk_struct.h:216
#define MAXLINELEN
Definition: gk_defs.h:34
void * gk_malloc(size_t nbytes, char *msg)
Definition: memory.c:140
void gk_free(void **ptr1,...)
Definition: memory.c:202
char ** list
Definition: gk_struct.h:142
void gk_fclose(FILE *)
Definition: GKlib/io.c:44
size_t len(handle h)
Get the length of a Python object.
Definition: pytypes.h:2244
void gk_seq_init(gk_seq_t *seq)
Definition: seq.c:27
std::ptrdiff_t j
gk_i2cc2i_t * gk_i2cc2i_create_common(char *alphabet)
This function creates the localizations for the various sequences.
Definition: seq.c:48
Point2 t(10, 10)
#define LTERM
Definition: gk_defs.h:14


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:35:41