forked from ray/RobotKernal-UESTC
322 lines
11 KiB
C
322 lines
11 KiB
C
|
// Copyright (C) 2011 Carl Rogers
|
||
|
// Released under MIT License
|
||
|
// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
|
||
|
|
||
|
#ifndef LIBCNPY_H_
|
||
|
#define LIBCNPY_H_
|
||
|
|
||
|
#if 0
|
||
|
#include <zlib.h>
|
||
|
#endif
|
||
|
|
||
|
#include <stdint.h>
|
||
|
|
||
|
#include <cassert>
|
||
|
#include <cstdio>
|
||
|
#include <fstream>
|
||
|
#include <iostream>
|
||
|
#include <map>
|
||
|
#include <memory>
|
||
|
#include <numeric>
|
||
|
#include <sstream>
|
||
|
#include <stdexcept>
|
||
|
#include <string>
|
||
|
#include <typeinfo>
|
||
|
#include <vector>
|
||
|
|
||
|
namespace cnpy {
|
||
|
|
||
|
struct NpyArray
|
||
|
{
|
||
|
NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order, std::string _typeName)
|
||
|
: shape(_shape)
|
||
|
, word_size(_word_size)
|
||
|
, fortran_order(_fortran_order)
|
||
|
, typeName(_typeName)
|
||
|
{
|
||
|
num_vals = 1;
|
||
|
for (size_t i = 0; i < shape.size(); i++)
|
||
|
num_vals *= shape[i];
|
||
|
data_holder = std::shared_ptr<std::vector<char>>(new std::vector<char>(num_vals * word_size));
|
||
|
}
|
||
|
|
||
|
NpyArray()
|
||
|
: shape(0)
|
||
|
, word_size(0)
|
||
|
, fortran_order(0)
|
||
|
, num_vals(0)
|
||
|
{}
|
||
|
|
||
|
template <typename T>
|
||
|
T* data()
|
||
|
{
|
||
|
return reinterpret_cast<T*>(&(*data_holder)[0]);
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
const T* data() const
|
||
|
{
|
||
|
return reinterpret_cast<T*>(&(*data_holder)[0]);
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
std::vector<T> as_vec() const
|
||
|
{
|
||
|
const T* p = data<T>();
|
||
|
return std::vector<T>(p, p + num_vals);
|
||
|
}
|
||
|
|
||
|
size_t num_bytes() const { return data_holder->size(); }
|
||
|
|
||
|
std::shared_ptr<std::vector<char>> data_holder;
|
||
|
std::vector<size_t> shape;
|
||
|
size_t word_size;
|
||
|
bool fortran_order;
|
||
|
size_t num_vals;
|
||
|
std::string typeName;
|
||
|
};
|
||
|
|
||
|
using npz_t = std::map<std::string, NpyArray>;
|
||
|
|
||
|
char BigEndianTest(int size);
|
||
|
char map_type(const std::type_info& t);
|
||
|
template <typename T>
|
||
|
std::vector<char> create_npy_header(const std::vector<size_t>& shape);
|
||
|
void parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
|
||
|
std::string& typeName);
|
||
|
void parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
|
||
|
std::string& typeName);
|
||
|
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
|
||
|
npz_t npz_load(std::string fname);
|
||
|
NpyArray npz_load(std::string fname, std::string varname);
|
||
|
NpyArray npy_load(std::string fname);
|
||
|
|
||
|
template <typename T>
|
||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs)
|
||
|
{
|
||
|
// write in little endian
|
||
|
for (size_t byte = 0; byte < sizeof(T); byte++) {
|
||
|
char val = *((char*)&rhs + byte);
|
||
|
lhs.push_back(val);
|
||
|
}
|
||
|
return lhs;
|
||
|
}
|
||
|
|
||
|
template <>
|
||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
|
||
|
template <>
|
||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
|
||
|
|
||
|
template <typename T>
|
||
|
int npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w")
|
||
|
{
|
||
|
std::ofstream ofs(fname, std::ios::out);
|
||
|
if (!ofs.is_open()) {
|
||
|
return -1;
|
||
|
}
|
||
|
ofs.close();
|
||
|
FILE* fp = NULL;
|
||
|
std::vector<size_t> true_data_shape; // if appending, the shape of existing + new data
|
||
|
|
||
|
if (mode == "a")
|
||
|
fp = fopen(fname.c_str(), "r+b");
|
||
|
|
||
|
if (fp) {
|
||
|
// file exists. we need to append to it. read the header, modify the array size
|
||
|
size_t word_size;
|
||
|
bool fortran_order;
|
||
|
std::string typeName;
|
||
|
parse_npy_header(fp, word_size, true_data_shape, fortran_order, typeName);
|
||
|
assert(!fortran_order);
|
||
|
|
||
|
if (word_size != sizeof(T)) {
|
||
|
std::cout << "libnpy error: " << fname << " has word size " << word_size << " but npy_save appending data sized "
|
||
|
<< sizeof(T) << "\n";
|
||
|
assert(word_size == sizeof(T));
|
||
|
}
|
||
|
if (true_data_shape.size() != shape.size()) {
|
||
|
std::cout << "libnpy error: npy_save attempting to append misdimensioned data to " << fname << "\n";
|
||
|
assert(true_data_shape.size() != shape.size());
|
||
|
}
|
||
|
|
||
|
for (size_t i = 1; i < shape.size(); i++) {
|
||
|
if (shape[i] != true_data_shape[i]) {
|
||
|
std::cout << "libnpy error: npy_save attempting to append misshaped data to " << fname << "\n";
|
||
|
assert(shape[i] == true_data_shape[i]);
|
||
|
}
|
||
|
}
|
||
|
true_data_shape[0] += shape[0];
|
||
|
} else {
|
||
|
fp = fopen(fname.c_str(), "wb");
|
||
|
true_data_shape = shape;
|
||
|
}
|
||
|
|
||
|
std::vector<char> header = create_npy_header<T>(true_data_shape);
|
||
|
size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
|
||
|
|
||
|
fseek(fp, 0, SEEK_SET);
|
||
|
fwrite(&header[0], sizeof(char), header.size(), fp);
|
||
|
fseek(fp, 0, SEEK_END);
|
||
|
fwrite(data, sizeof(T), nels, fp);
|
||
|
fclose(fp);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape,
|
||
|
std::string mode = "w")
|
||
|
{
|
||
|
// first, append a .npy to the fname
|
||
|
fname += ".npy";
|
||
|
|
||
|
// now, on with the show
|
||
|
FILE* fp = NULL;
|
||
|
uint16_t nrecs = 0;
|
||
|
size_t global_header_offset = 0;
|
||
|
std::vector<char> global_header;
|
||
|
|
||
|
if (mode == "a")
|
||
|
fp = fopen(zipname.c_str(), "r+b");
|
||
|
|
||
|
if (fp) {
|
||
|
// zip file exists. we need to add a new npy file to it.
|
||
|
// first read the footer. this gives us the offset and size of the global header
|
||
|
// then read and store the global header.
|
||
|
// below, we will write the the new data at the start of the global header then append the global header and footer
|
||
|
// below it
|
||
|
size_t global_header_size;
|
||
|
parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
|
||
|
fseek(fp, global_header_offset, SEEK_SET);
|
||
|
global_header.resize(global_header_size);
|
||
|
size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
|
||
|
if (res != global_header_size) {
|
||
|
throw std::runtime_error("npz_save: header read error while adding to existing zip");
|
||
|
}
|
||
|
fseek(fp, global_header_offset, SEEK_SET);
|
||
|
} else {
|
||
|
fp = fopen(zipname.c_str(), "wb");
|
||
|
}
|
||
|
|
||
|
std::vector<char> npy_header = create_npy_header<T>(shape);
|
||
|
|
||
|
size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
|
||
|
size_t nbytes = nels * sizeof(T) + npy_header.size();
|
||
|
|
||
|
#if 0
|
||
|
// get the CRC of the data to be added
|
||
|
uint32_t crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size());
|
||
|
crc = crc32(crc, (uint8_t*)data, nels * sizeof(T));
|
||
|
#else
|
||
|
uint32_t crc = 0;
|
||
|
#endif
|
||
|
|
||
|
// build the local header
|
||
|
std::vector<char> local_header;
|
||
|
local_header += "PK"; // first part of sig
|
||
|
local_header += (uint16_t)0x0403; // second part of sig
|
||
|
local_header += (uint16_t)20; // min version to extract
|
||
|
local_header += (uint16_t)0; // general purpose bit flag
|
||
|
local_header += (uint16_t)0; // compression method
|
||
|
local_header += (uint16_t)0; // file last mod time
|
||
|
local_header += (uint16_t)0; // file last mod date
|
||
|
local_header += (uint32_t)crc; // crc
|
||
|
local_header += (uint32_t)nbytes; // compressed size
|
||
|
local_header += (uint32_t)nbytes; // uncompressed size
|
||
|
local_header += (uint16_t)fname.size(); // fname length
|
||
|
local_header += (uint16_t)0; // extra field length
|
||
|
local_header += fname;
|
||
|
|
||
|
// build global header
|
||
|
global_header += "PK"; // first part of sig
|
||
|
global_header += (uint16_t)0x0201; // second part of sig
|
||
|
global_header += (uint16_t)20; // version made by
|
||
|
global_header.insert(global_header.end(), local_header.begin() + 4, local_header.begin() + 30);
|
||
|
global_header += (uint16_t)0; // file comment length
|
||
|
global_header += (uint16_t)0; // disk number where file starts
|
||
|
global_header += (uint16_t)0; // internal file attributes
|
||
|
global_header += (uint32_t)0; // external file attributes
|
||
|
global_header += (uint32_t)
|
||
|
global_header_offset; // relative offset of local file header, since it begins where the global header used to begin
|
||
|
global_header += fname;
|
||
|
|
||
|
// build footer
|
||
|
std::vector<char> footer;
|
||
|
footer += "PK"; // first part of sig
|
||
|
footer += (uint16_t)0x0605; // second part of sig
|
||
|
footer += (uint16_t)0; // number of this disk
|
||
|
footer += (uint16_t)0; // disk where footer starts
|
||
|
footer += (uint16_t)(nrecs + 1); // number of records on this disk
|
||
|
footer += (uint16_t)(nrecs + 1); // total number of records
|
||
|
footer += (uint32_t)global_header.size(); // nbytes of global headers
|
||
|
footer +=
|
||
|
(uint32_t)(global_header_offset + nbytes + local_header.size()); // offset of start of global headers, since global
|
||
|
// header now starts after newly written array
|
||
|
footer += (uint16_t)0; // zip file comment length
|
||
|
|
||
|
// write everything
|
||
|
fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
|
||
|
fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
|
||
|
fwrite(data, sizeof(T), nels, fp);
|
||
|
fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
|
||
|
fwrite(&footer[0], sizeof(char), footer.size(), fp);
|
||
|
fclose(fp);
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w")
|
||
|
{
|
||
|
std::vector<size_t> shape;
|
||
|
shape.push_back(data.size());
|
||
|
npy_save(fname, &data[0], shape, mode);
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w")
|
||
|
{
|
||
|
std::vector<size_t> shape;
|
||
|
shape.push_back(data.size());
|
||
|
npz_save(zipname, fname, &data[0], shape, mode);
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
std::vector<char> create_npy_header(const std::vector<size_t>& shape)
|
||
|
{
|
||
|
const char* tpye_name = typeid(T).name();
|
||
|
std::vector<char> dict;
|
||
|
dict += "{'descr': '";
|
||
|
dict += BigEndianTest(sizeof(T));
|
||
|
if (std::string(tpye_name) == "N4rknn7float16E") {
|
||
|
dict += "f";
|
||
|
} else {
|
||
|
dict += map_type(typeid(T));
|
||
|
}
|
||
|
dict += std::to_string(sizeof(T));
|
||
|
dict += "', 'fortran_order': False, 'shape': (";
|
||
|
dict += std::to_string(shape[0]);
|
||
|
for (size_t i = 1; i < shape.size(); i++) {
|
||
|
dict += ", ";
|
||
|
dict += std::to_string(shape[i]);
|
||
|
}
|
||
|
if (shape.size() == 1)
|
||
|
dict += ",";
|
||
|
dict += "), }";
|
||
|
// pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
|
||
|
int remainder = 16 - (10 + dict.size()) % 16;
|
||
|
dict.insert(dict.end(), remainder, ' ');
|
||
|
dict.back() = '\n';
|
||
|
|
||
|
std::vector<char> header;
|
||
|
header += (char)0x93;
|
||
|
header += "NUMPY";
|
||
|
header += (char)0x01; // major version of numpy format
|
||
|
header += (char)0x00; // minor version of numpy format
|
||
|
header += (uint16_t)dict.size();
|
||
|
header.insert(header.end(), dict.begin(), dict.end());
|
||
|
|
||
|
return header;
|
||
|
}
|
||
|
|
||
|
} // namespace cnpy
|
||
|
|
||
|
#endif
|