cnpy.h
Go to the documentation of this file.
1 //Copyright (C) 2011 Carl Rogers
2 //Released under MIT License
3 //license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
4 
5 #ifndef LIBCNPY_H_
6 #define LIBCNPY_H_
7 
8 #include<string>
9 #include<stdexcept>
10 #include<sstream>
11 #include<vector>
12 #include<cstdio>
13 #include<typeinfo>
14 #include<iostream>
15 #include<cassert>
16 #include<zlib.h>
17 #include<map>
18 #include<memory>
19 #include<stdint.h>
20 #include<numeric>
21 
22 namespace cnpy {
23 
24  struct NpyArray {
25  NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order) :
26  shape(_shape), word_size(_word_size), fortran_order(_fortran_order)
27  {
28  num_vals = 1;
29  for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
30  data_holder = std::shared_ptr<std::vector<char>>(
31  new std::vector<char>(num_vals * word_size));
32  }
33 
35 
36  template<typename T>
37  T* data() {
38  return reinterpret_cast<T*>(&(*data_holder)[0]);
39  }
40 
41  template<typename T>
42  const T* data() const {
43  return reinterpret_cast<T*>(&(*data_holder)[0]);
44  }
45 
46  template<typename T>
47  std::vector<T> as_vec() const {
48  const T* p = data<T>();
49  return std::vector<T>(p, p+num_vals);
50  }
51 
52  size_t num_bytes() const {
53  return data_holder->size();
54  }
55 
56  std::shared_ptr<std::vector<char>> data_holder;
57  std::vector<size_t> shape;
58  size_t word_size;
60  size_t num_vals;
61  };
62 
63  using npz_t = std::map<std::string, NpyArray>;
64 
65  char BigEndianTest();
66  char map_type(const std::type_info& t);
67  template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape);
68  void parse_npy_header(FILE* fp,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
69  void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
70  void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
71  npz_t npz_load(std::string fname);
72  NpyArray npz_load(std::string fname, std::string varname);
73  NpyArray npy_load(std::string fname);
74 
75  template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
76  //write in little endian
77  for(size_t byte = 0; byte < sizeof(T); byte++) {
78  char val = *((char*)&rhs+byte);
79  lhs.push_back(val);
80  }
81  return lhs;
82  }
83 
84  template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
85  template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
86 
87 
88  template<typename T> void npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w") {
89  FILE* fp = NULL;
90  std::vector<size_t> true_data_shape; //if appending, the shape of existing + new data
91 
92  if(mode == "a") fp = fopen(fname.c_str(),"r+b");
93 
94  if(fp) {
95  //file exists. we need to append to it. read the header, modify the array size
96  size_t word_size;
97  bool fortran_order;
98  parse_npy_header(fp,word_size,true_data_shape,fortran_order);
99  assert(!fortran_order);
100 
101  if(word_size != sizeof(T)) {
102  std::cout<<"libnpy error: "<<fname<<" has word size "<<word_size<<" but npy_save appending data sized "<<sizeof(T)<<"\n";
103  assert( word_size == sizeof(T) );
104  }
105  if(true_data_shape.size() != shape.size()) {
106  std::cout<<"libnpy error: npy_save attempting to append misdimensioned data to "<<fname<<"\n";
107  assert(true_data_shape.size() != shape.size());
108  }
109 
110  for(size_t i = 1; i < shape.size(); i++) {
111  if(shape[i] != true_data_shape[i]) {
112  std::cout<<"libnpy error: npy_save attempting to append misshaped data to "<<fname<<"\n";
113  assert(shape[i] == true_data_shape[i]);
114  }
115  }
116  true_data_shape[0] += shape[0];
117  }
118  else {
119  fp = fopen(fname.c_str(),"wb");
120  true_data_shape = shape;
121  }
122 
123  std::vector<char> header = create_npy_header<T>(true_data_shape);
124  size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
125 
126  fseek(fp,0,SEEK_SET);
127  fwrite(&header[0],sizeof(char),header.size(),fp);
128  fseek(fp,0,SEEK_END);
129  fwrite(data,sizeof(T),nels,fp);
130  fclose(fp);
131  }
132 
133  template<typename T> void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape, std::string mode = "w")
134  {
135  //first, append a .npy to the fname
136  fname += ".npy";
137 
138  //now, on with the show
139  FILE* fp = NULL;
140  uint16_t nrecs = 0;
141  size_t global_header_offset = 0;
142  std::vector<char> global_header;
143 
144  if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
145 
146  if(fp) {
147  //zip file exists. we need to add a new npy file to it.
148  //first read the footer. this gives us the offset and size of the global header
149  //then read and store the global header.
150  //below, we will write the the new data at the start of the global header then append the global header and footer below it
151  size_t global_header_size;
152  parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
153  fseek(fp,global_header_offset,SEEK_SET);
154  global_header.resize(global_header_size);
155  size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
156  if(res != global_header_size){
157  throw std::runtime_error("npz_save: header read error while adding to existing zip");
158  }
159  fseek(fp,global_header_offset,SEEK_SET);
160  }
161  else {
162  fp = fopen(zipname.c_str(),"wb");
163  }
164 
165  std::vector<char> npy_header = create_npy_header<T>(shape);
166 
167  size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
168  size_t nbytes = nels*sizeof(T) + npy_header.size();
169 
170  //get the CRC of the data to be added
171  uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size());
172  crc = crc32(crc,(uint8_t*)data,nels*sizeof(T));
173 
174  //build the local header
175  std::vector<char> local_header;
176  local_header += "PK"; //first part of sig
177  local_header += (uint16_t) 0x0403; //second part of sig
178  local_header += (uint16_t) 20; //min version to extract
179  local_header += (uint16_t) 0; //general purpose bit flag
180  local_header += (uint16_t) 0; //compression method
181  local_header += (uint16_t) 0; //file last mod time
182  local_header += (uint16_t) 0; //file last mod date
183  local_header += (uint32_t) crc; //crc
184  local_header += (uint32_t) nbytes; //compressed size
185  local_header += (uint32_t) nbytes; //uncompressed size
186  local_header += (uint16_t) fname.size(); //fname length
187  local_header += (uint16_t) 0; //extra field length
188  local_header += fname;
189 
190  //build global header
191  global_header += "PK"; //first part of sig
192  global_header += (uint16_t) 0x0201; //second part of sig
193  global_header += (uint16_t) 20; //version made by
194  global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30);
195  global_header += (uint16_t) 0; //file comment length
196  global_header += (uint16_t) 0; //disk number where file starts
197  global_header += (uint16_t) 0; //internal file attributes
198  global_header += (uint32_t) 0; //external file attributes
199  global_header += (uint32_t) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
200  global_header += fname;
201 
202  //build footer
203  std::vector<char> footer;
204  footer += "PK"; //first part of sig
205  footer += (uint16_t) 0x0605; //second part of sig
206  footer += (uint16_t) 0; //number of this disk
207  footer += (uint16_t) 0; //disk where footer starts
208  footer += (uint16_t) (nrecs+1); //number of records on this disk
209  footer += (uint16_t) (nrecs+1); //total number of records
210  footer += (uint32_t) global_header.size(); //nbytes of global headers
211  footer += (uint32_t) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
212  footer += (uint16_t) 0; //zip file comment length
213 
214  //write everything
215  fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
216  fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
217  fwrite(data,sizeof(T),nels,fp);
218  fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
219  fwrite(&footer[0],sizeof(char),footer.size(),fp);
220  fclose(fp);
221  }
222 
223  template<typename T> void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w") {
224  std::vector<size_t> shape;
225  shape.push_back(data.size());
226  npy_save(fname, &data[0], shape, mode);
227  }
228 
229  template<typename T> void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w") {
230  std::vector<size_t> shape;
231  shape.push_back(data.size());
232  npz_save(zipname, fname, &data[0], shape, mode);
233  }
234 
235  template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape) {
236 
237  std::vector<char> dict;
238  dict += "{'descr': '";
239  dict += BigEndianTest();
240  dict += map_type(typeid(T));
241  dict += std::to_string(sizeof(T));
242  dict += "', 'fortran_order': False, 'shape': (";
243  dict += std::to_string(shape[0]);
244  for(size_t i = 1;i < shape.size();i++) {
245  dict += ", ";
246  dict += std::to_string(shape[i]);
247  }
248  if(shape.size() == 1) dict += ",";
249  dict += "), }";
250  //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
251  int remainder = 16 - (10 + dict.size()) % 16;
252  dict.insert(dict.end(),remainder,' ');
253  dict.back() = '\n';
254 
255  std::vector<char> header;
256  header += (char) 0x93;
257  header += "NUMPY";
258  header += (char) 0x01; //major version of numpy format
259  header += (char) 0x00; //minor version of numpy format
260  header += (uint16_t) dict.size();
261  header.insert(header.end(),dict.begin(),dict.end());
262 
263  return header;
264  }
265 
266 
267 }
268 
269 #endif
cnpy::npy_load
NpyArray npy_load(std::string fname)
Definition: cnpy.cpp:329
cnpy::NpyArray::data
T * data()
Definition: cnpy.h:37
cnpy::parse_zip_footer
void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
Definition: cnpy.cpp:158
cnpy::NpyArray::NpyArray
NpyArray()
Definition: cnpy.h:34
cnpy::map_type
char map_type(const std::type_info &t)
Definition: cnpy.cpp:21
cnpy::npz_t
std::map< std::string, NpyArray > npz_t
Definition: cnpy.h:63
cnpy::npy_save
void npy_save(std::string fname, const T *data, const std::vector< size_t > shape, std::string mode="w")
Definition: cnpy.h:88
cnpy::NpyArray::NpyArray
NpyArray(const std::vector< size_t > &_shape, size_t _word_size, bool _fortran_order)
Definition: cnpy.h:25
cnpy
Definition: cnpy.h:22
cnpy::npz_load
npz_t npz_load(std::string fname)
Definition: cnpy.cpp:232
cnpy::parse_npy_header
void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order)
Definition: cnpy.cpp:107
cnpy::NpyArray::num_vals
size_t num_vals
Definition: cnpy.h:60
cnpy::NpyArray
Definition: cnpy.h:24
cnpy::NpyArray::word_size
size_t word_size
Definition: cnpy.h:58
cnpy::NpyArray::num_bytes
size_t num_bytes() const
Definition: cnpy.h:52
cnpy::NpyArray::fortran_order
bool fortran_order
Definition: cnpy.h:59
cnpy::NpyArray::data
const T * data() const
Definition: cnpy.h:42
cnpy::create_npy_header
std::vector< char > create_npy_header(const std::vector< size_t > &shape)
Definition: cnpy.h:235
cnpy::NpyArray::shape
std::vector< size_t > shape
Definition: cnpy.h:57
cnpy::operator+=
std::vector< char > & operator+=(std::vector< char > &lhs, const T rhs)
Definition: cnpy.h:75
cnpy::NpyArray::as_vec
std::vector< T > as_vec() const
Definition: cnpy.h:47
cnpy::NpyArray::data_holder
std::shared_ptr< std::vector< char > > data_holder
Definition: cnpy.h:56
cnpy::BigEndianTest
char BigEndianTest()
Definition: cnpy.cpp:16
cnpy::npz_save
void npz_save(std::string zipname, std::string fname, const T *data, const std::vector< size_t > &shape, std::string mode="w")
Definition: cnpy.h:133


cnpy
Author(s): Carl Rogers, Peter Mitrano
autogenerated on Tue Mar 1 2022 23:57:46