cnpy.cpp
Go to the documentation of this file.
1 //Copyright (C) 2011 Carl Rogers
2 //Released under MIT License
3 //license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
4 
5 #include <complex>
6 #include <cstdlib>
7 #include <algorithm>
8 #include <cstring>
9 #include <iomanip>
10 #include <stdint.h>
11 #include <stdexcept>
12 #include <regex>
13 
14 #include <cnpy/cnpy.h>
15 
17  int x = 1;
18  return (((char *) &x)[0]) ? '<' : '>';
19 }
20 
21 char cnpy::map_type(const std::type_info &t) {
22  if (t == typeid(float)) return 'f';
23  if (t == typeid(double)) return 'f';
24  if (t == typeid(long double)) return 'f';
25 
26  if (t == typeid(int)) return 'i';
27  if (t == typeid(char)) return 'i';
28  if (t == typeid(short)) return 'i';
29  if (t == typeid(long)) return 'i';
30  if (t == typeid(long long)) return 'i';
31 
32  if (t == typeid(unsigned char)) return 'u';
33  if (t == typeid(unsigned short)) return 'u';
34  if (t == typeid(unsigned long)) return 'u';
35  if (t == typeid(unsigned long long)) return 'u';
36  if (t == typeid(unsigned int)) return 'u';
37 
38  if (t == typeid(bool)) return 'b';
39 
40  if (t == typeid(std::complex < float > )) return 'c';
41  if (t == typeid(std::complex < double > )) return 'c';
42  if (t == typeid(std::complex < long double > )) return 'c';
43 
44  else return '?';
45 }
46 
47 template<>
48 std::vector<char> &cnpy::operator+=(std::vector<char> &lhs, const std::string rhs) {
49  lhs.insert(lhs.end(), rhs.begin(), rhs.end());
50  return lhs;
51 }
52 
53 template<>
54 std::vector<char> &cnpy::operator+=(std::vector<char> &lhs, const char *rhs) {
55  //write in little endian
56  size_t len = strlen(rhs);
57  lhs.reserve(len);
58  for (size_t byte = 0; byte < len; byte++) {
59  lhs.push_back(rhs[byte]);
60  }
61  return lhs;
62 }
63 
64 void
65 cnpy::parse_npy_header(unsigned char *buffer, size_t &word_size, std::vector <size_t> &shape, bool &fortran_order) {
66  //std::string magic_string(buffer,6);
67  uint8_t major_version = *reinterpret_cast<uint8_t *>(buffer + 6);
68  uint8_t minor_version = *reinterpret_cast<uint8_t *>(buffer + 7);
69  uint16_t header_len = *reinterpret_cast<uint16_t *>(buffer + 8);
70  std::string header(reinterpret_cast<char *>(buffer + 9), header_len);
71 
72  size_t loc1, loc2;
73 
74  //fortran order
75  loc1 = header.find("fortran_order") + 16;
76  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
77 
78  //shape
79  loc1 = header.find("(");
80  loc2 = header.find(")");
81 
82  std::regex num_regex("[0-9][0-9]*");
83  std::smatch sm;
84  shape.clear();
85 
86  std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
87  while (std::regex_search(str_shape, sm, num_regex)) {
88  shape.push_back(std::stoi(sm[0].str()));
89  str_shape = sm.suffix().str();
90  }
91 
92  //endian, word size, data type
93  //byte order code | stands for not applicable.
94  //not sure when this applies except for byte array
95  loc1 = header.find("descr") + 9;
96  bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
97  assert(littleEndian);
98 
99  //char type = header[loc1+1];
100  //assert(type == map_type(T));
101 
102  std::string str_ws = header.substr(loc1 + 2);
103  loc2 = str_ws.find("'");
104  word_size = atoi(str_ws.substr(0, loc2).c_str());
105 }
106 
107 void cnpy::parse_npy_header(FILE *fp, size_t &word_size, std::vector <size_t> &shape, bool &fortran_order) {
108  char buffer[256];
109  size_t res = fread(buffer, sizeof(char), 11, fp);
110  if (res != 11)
111  throw std::runtime_error("parse_npy_header: failed fread");
112  std::string header = fgets(buffer, 256, fp);
113  assert(header[header.size() - 1] == '\n');
114 
115  size_t loc1, loc2;
116 
117  //fortran order
118  loc1 = header.find("fortran_order");
119  if (loc1 == std::string::npos)
120  throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
121  loc1 += 16;
122  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
123 
124  //shape
125  loc1 = header.find("(");
126  loc2 = header.find(")");
127  if (loc1 == std::string::npos || loc2 == std::string::npos)
128  throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
129 
130  std::regex num_regex("[0-9][0-9]*");
131  std::smatch sm;
132  shape.clear();
133 
134  std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
135  while (std::regex_search(str_shape, sm, num_regex)) {
136  shape.push_back(std::stoi(sm[0].str()));
137  str_shape = sm.suffix().str();
138  }
139 
140  //endian, word size, data type
141  //byte order code | stands for not applicable.
142  //not sure when this applies except for byte array
143  loc1 = header.find("descr");
144  if (loc1 == std::string::npos)
145  throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
146  loc1 += 9;
147  bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
148  assert(littleEndian);
149 
150  //char type = header[loc1+1];
151  //assert(type == map_type(T));
152 
153  std::string str_ws = header.substr(loc1 + 2);
154  loc2 = str_ws.find("'");
155  word_size = atoi(str_ws.substr(0, loc2).c_str());
156 }
157 
158 void cnpy::parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset) {
159  std::vector<char> footer(22);
160  fseek(fp, -22, SEEK_END);
161  size_t res = fread(&footer[0], sizeof(char), 22, fp);
162  if (res != 22)
163  throw std::runtime_error("parse_zip_footer: failed fread");
164 
165  uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
166  disk_no = *(uint16_t * ) & footer[4];
167  disk_start = *(uint16_t * ) & footer[6];
168  nrecs_on_disk = *(uint16_t * ) & footer[8];
169  nrecs = *(uint16_t * ) & footer[10];
170  global_header_size = *(uint32_t * ) & footer[12];
171  global_header_offset = *(uint32_t * ) & footer[16];
172  comment_len = *(uint16_t * ) & footer[20];
173 
174  assert(disk_no == 0);
175  assert(disk_start == 0);
176  assert(nrecs_on_disk == nrecs);
177  assert(comment_len == 0);
178 }
179 
181  std::vector <size_t> shape;
182  size_t word_size;
183  bool fortran_order;
184  cnpy::parse_npy_header(fp, word_size, shape, fortran_order);
185 
186  cnpy::NpyArray arr(shape, word_size, fortran_order);
187  size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
188  if (nread != arr.num_bytes())
189  throw std::runtime_error("load_the_npy_file: failed fread");
190  return arr;
191 }
192 
193 cnpy::NpyArray load_the_npz_array(FILE *fp, uint32_t compr_bytes, uint32_t uncompr_bytes) {
194 
195  std::vector<unsigned char> buffer_compr(compr_bytes);
196  std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
197  size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
198  if (nread != compr_bytes)
199  throw std::runtime_error("load_the_npy_file: failed fread");
200 
201  int err;
202  z_stream d_stream;
203 
204  d_stream.zalloc = Z_NULL;
205  d_stream.zfree = Z_NULL;
206  d_stream.opaque = Z_NULL;
207  d_stream.avail_in = 0;
208  d_stream.next_in = Z_NULL;
209  err = inflateInit2(&d_stream, -MAX_WBITS);
210 
211  d_stream.avail_in = compr_bytes;
212  d_stream.next_in = &buffer_compr[0];
213  d_stream.avail_out = uncompr_bytes;
214  d_stream.next_out = &buffer_uncompr[0];
215 
216  err = inflate(&d_stream, Z_FINISH);
217  err = inflateEnd(&d_stream);
218 
219  std::vector <size_t> shape;
220  size_t word_size;
221  bool fortran_order;
222  cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order);
223 
224  cnpy::NpyArray array(shape, word_size, fortran_order);
225 
226  size_t offset = uncompr_bytes - array.num_bytes();
227  memcpy(array.data<unsigned char>(), &buffer_uncompr[0] + offset, array.num_bytes());
228 
229  return array;
230 }
231 
232 cnpy::npz_t cnpy::npz_load(std::string fname) {
233  FILE *fp = fopen(fname.c_str(), "rb");
234 
235  if (!fp) {
236  throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!");
237  }
238 
239  cnpy::npz_t arrays;
240 
241  while (1) {
242  std::vector<char> local_header(30);
243  size_t headerres = fread(&local_header[0], sizeof(char), 30, fp);
244  if (headerres != 30)
245  throw std::runtime_error("npz_load: failed fread");
246 
247  //if we've reached the global header, stop reading
248  if (local_header[2] != 0x03 || local_header[3] != 0x04) break;
249 
250  //read in the variable name
251  uint16_t name_len = *(uint16_t * ) & local_header[26];
252  std::string varname(name_len, ' ');
253  size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp);
254  if (vname_res != name_len)
255  throw std::runtime_error("npz_load: failed fread");
256 
257  //erase the lagging .npy
258  varname.erase(varname.end() - 4, varname.end());
259 
260  //read in the extra field
261  uint16_t extra_field_len = *(uint16_t * ) & local_header[28];
262  if (extra_field_len > 0) {
263  std::vector<char> buff(extra_field_len);
264  size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp);
265  if (efield_res != extra_field_len)
266  throw std::runtime_error("npz_load: failed fread");
267  }
268 
269  uint16_t compr_method = *reinterpret_cast<uint16_t *>(&local_header[0] + 8);
270  uint32_t compr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + 18);
271  uint32_t uncompr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + 22);
272 
273  if (compr_method == 0) { arrays[varname] = load_the_npy_file(fp); }
274  else { arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes); }
275  }
276 
277  fclose(fp);
278  return arrays;
279 }
280 
281 cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
282  FILE *fp = fopen(fname.c_str(), "rb");
283 
284  if (!fp) throw std::runtime_error("npz_load: Unable to open file " + fname);
285 
286  while (1) {
287  std::vector<char> local_header(30);
288  size_t header_res = fread(&local_header[0], sizeof(char), 30, fp);
289  if (header_res != 30)
290  throw std::runtime_error("npz_load: failed fread");
291 
292  //if we've reached the global header, stop reading
293  if (local_header[2] != 0x03 || local_header[3] != 0x04) break;
294 
295  //read in the variable name
296  uint16_t name_len = *(uint16_t * ) & local_header[26];
297  std::string vname(name_len, ' ');
298  size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp);
299  if (vname_res != name_len)
300  throw std::runtime_error("npz_load: failed fread");
301  vname.erase(vname.end() - 4, vname.end()); //erase the lagging .npy
302 
303  //read in the extra field
304  uint16_t extra_field_len = *(uint16_t * ) & local_header[28];
305  fseek(fp, extra_field_len, SEEK_CUR); //skip past the extra field
306 
307  uint16_t compr_method = *reinterpret_cast<uint16_t *>(&local_header[0] + 8);
308  uint32_t compr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + 18);
309  uint32_t uncompr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + 22);
310 
311  if (vname == varname) {
312  NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes,
313  uncompr_bytes);
314  fclose(fp);
315  return array;
316  } else {
317  //skip past the data
318  uint32_t size = *(uint32_t * ) & local_header[22];
319  fseek(fp, size, SEEK_CUR);
320  }
321  }
322 
323  fclose(fp);
324 
325  //if we get here, we haven't found the variable in the file
326  throw std::runtime_error("npz_load: Variable name " + varname + " not found in " + fname);
327 }
328 
329 cnpy::NpyArray cnpy::npy_load(std::string fname) {
330 
331  FILE *fp = fopen(fname.c_str(), "rb");
332 
333  if (!fp) throw std::runtime_error("npy_load: Unable to open file " + fname);
334 
335  NpyArray arr = load_the_npy_file(fp);
336 
337  fclose(fp);
338  return arr;
339 }
340 
341 
342 
cnpy::npy_load
NpyArray npy_load(std::string fname)
Definition: cnpy.cpp:329
cnpy::NpyArray::data
T * data()
Definition: cnpy.h:37
cnpy::parse_zip_footer
void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
Definition: cnpy.cpp:158
cnpy::map_type
char map_type(const std::type_info &t)
Definition: cnpy.cpp:21
cnpy::npz_t
std::map< std::string, NpyArray > npz_t
Definition: cnpy.h:63
cnpy::npz_load
npz_t npz_load(std::string fname)
Definition: cnpy.cpp:232
cnpy::parse_npy_header
void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order)
Definition: cnpy.cpp:107
cnpy::NpyArray
Definition: cnpy.h:24
cnpy::NpyArray::num_bytes
size_t num_bytes() const
Definition: cnpy.h:52
cnpy.h
cnpy::operator+=
std::vector< char > & operator+=(std::vector< char > &lhs, const T rhs)
Definition: cnpy.h:75
load_the_npy_file
cnpy::NpyArray load_the_npy_file(FILE *fp)
Definition: cnpy.cpp:180
cnpy::BigEndianTest
char BigEndianTest()
Definition: cnpy.cpp:16
load_the_npz_array
cnpy::NpyArray load_the_npz_array(FILE *fp, uint32_t compr_bytes, uint32_t uncompr_bytes)
Definition: cnpy.cpp:193


cnpy
Author(s): Carl Rogers, Peter Mitrano
autogenerated on Tue Mar 1 2022 23:57:46