1
0
Fork 0

Merge pull request 'main' (#1) from LXX/RobotKernal-UESTC:main into main

Reviewed-on: http://logzhan.ticp.io:30000/ray/RobotKernal-UESTC/pulls/1
LRD_Develop
ray 2023-12-29 16:15:06 +08:00
commit d8665d317c
454 changed files with 498647 additions and 0 deletions

View File

@ -0,0 +1,403 @@
// Copyright (C) 2011 Carl Rogers
// Released under MIT License
// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#include "cnpy.h"
#include <stdint.h>
#include <algorithm>
#include <complex>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <regex>
#include <stdexcept>
char cnpy::BigEndianTest(int size)
{
if (size == 1)
return '|';
int x = 1;
return (((char*)&x)[0]) ? '<' : '>';
}
char cnpy::map_type(const std::type_info& t)
{
if (t == typeid(float))
return 'f';
if (t == typeid(double))
return 'f';
if (t == typeid(long double))
return 'f';
if (t == typeid(int))
return 'i';
if (t == typeid(char))
return 'i';
if (t == typeid(signed char))
return 'i';
if (t == typeid(short))
return 'i';
if (t == typeid(long))
return 'i';
if (t == typeid(long long))
return 'i';
if (t == typeid(unsigned char))
return 'u';
if (t == typeid(unsigned short))
return 'u';
if (t == typeid(unsigned long))
return 'u';
if (t == typeid(unsigned long long))
return 'u';
if (t == typeid(unsigned int))
return 'u';
if (t == typeid(bool))
return 'b';
if (t == typeid(std::complex<float>))
return 'c';
if (t == typeid(std::complex<double>))
return 'c';
if (t == typeid(std::complex<long double>))
return 'c';
else
return '?';
}
template <>
std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs)
{
lhs.insert(lhs.end(), rhs.begin(), rhs.end());
return lhs;
}
template <>
std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs)
{
// write in little endian
size_t len = strlen(rhs);
lhs.reserve(len);
for (size_t byte = 0; byte < len; byte++) {
lhs.push_back(rhs[byte]);
}
return lhs;
}
void cnpy::parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
std::string& typeName)
{
// std::string magic_string(buffer,6);
uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer + 6);
uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer + 7);
uint16_t header_len = *reinterpret_cast<uint16_t*>(buffer + 8);
std::string header(reinterpret_cast<char*>(buffer + 9), header_len);
size_t loc1, loc2;
// fortran order
loc1 = header.find("fortran_order") + 16;
fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
if (fortran_order)
throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)");
// shape
loc1 = header.find("(");
loc2 = header.find(")");
std::regex num_regex("[0-9][0-9]*");
std::smatch sm;
shape.clear();
std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
while (std::regex_search(str_shape, sm, num_regex)) {
shape.push_back(std::stoi(sm[0].str()));
str_shape = sm.suffix().str();
}
// endian, word size, data type
// byte order code | stands for not applicable.
// not sure when this applies except for byte array
loc1 = header.find("descr") + 9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
// char type = header[loc1+1];
// assert(type == map_type(T));
std::string str_ws = header.substr(loc1 + 2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0, loc2).c_str());
if (header.substr(loc1 + 1, 1) == "i") {
typeName = "int";
} else if (header.substr(loc1 + 1, 1) == "u") {
typeName = "uint";
} else if (header.substr(loc1 + 1, 1) == "f") {
typeName = "float";
}
typeName = typeName + std::to_string(word_size * 8);
}
void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
std::string& typeName)
{
char buffer[256];
size_t res = fread(buffer, sizeof(char), 11, fp);
if (res != 11)
throw std::runtime_error("parse_npy_header: failed fread");
std::string header = fgets(buffer, 256, fp);
assert(header[header.size() - 1] == '\n');
size_t loc1, loc2;
// fortran order
loc1 = header.find("fortran_order");
if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
loc1 += 16;
fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
if (fortran_order)
throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)");
// shape
loc1 = header.find("(");
loc2 = header.find(")");
if (loc1 == std::string::npos || loc2 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
std::regex num_regex("[0-9][0-9]*");
std::smatch sm;
shape.clear();
std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
while (std::regex_search(str_shape, sm, num_regex)) {
shape.push_back(std::stoi(sm[0].str()));
str_shape = sm.suffix().str();
}
// endian, word size, data type
// byte order code | stands for not applicable.
// not sure when this applies except for byte array
loc1 = header.find("descr");
if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
loc1 += 9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
// char type = header[loc1+1];
// assert(type == map_type(T));
std::string str_ws = header.substr(loc1 + 2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0, loc2).c_str());
if (header.substr(loc1 + 1, 1) == "i") {
typeName = "int";
} else if (header.substr(loc1 + 1, 1) == "u") {
typeName = "uint";
} else if (header.substr(loc1 + 1, 1) == "f") {
typeName = "float";
}
typeName = typeName + std::to_string(word_size * 8);
}
void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset)
{
std::vector<char> footer(22);
fseek(fp, -22, SEEK_END);
size_t res = fread(&footer[0], sizeof(char), 22, fp);
if (res != 22)
throw std::runtime_error("parse_zip_footer: failed fread");
uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
disk_no = *(uint16_t*)&footer[4];
disk_start = *(uint16_t*)&footer[6];
nrecs_on_disk = *(uint16_t*)&footer[8];
nrecs = *(uint16_t*)&footer[10];
global_header_size = *(uint32_t*)&footer[12];
global_header_offset = *(uint32_t*)&footer[16];
comment_len = *(uint16_t*)&footer[20];
assert(disk_no == 0);
assert(disk_start == 0);
assert(nrecs_on_disk == nrecs);
assert(comment_len == 0);
}
cnpy::NpyArray load_the_npy_file(FILE* fp)
{
std::vector<size_t> shape;
size_t word_size;
std::string typeName;
bool fortran_order;
cnpy::parse_npy_header(fp, word_size, shape, fortran_order, typeName);
cnpy::NpyArray arr(shape, word_size, fortran_order, typeName);
size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
if (nread != arr.num_bytes())
throw std::runtime_error("load_the_npy_file: failed fread");
return arr;
}
cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes)
{
std::vector<unsigned char> buffer_compr(compr_bytes);
std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
if (nread != compr_bytes)
throw std::runtime_error("load_the_npy_file: failed fread");
#if 0
int err;
z_stream d_stream;
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.avail_in = 0;
d_stream.next_in = Z_NULL;
err = inflateInit2(&d_stream, -MAX_WBITS);
d_stream.avail_in = compr_bytes;
d_stream.next_in = &buffer_compr[0];
d_stream.avail_out = uncompr_bytes;
d_stream.next_out = &buffer_uncompr[0];
err = inflate(&d_stream, Z_FINISH);
err = inflateEnd(&d_stream);
#endif
std::vector<size_t> shape;
size_t word_size;
bool fortran_order;
std::string typeName;
cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order, typeName);
cnpy::NpyArray array(shape, word_size, fortran_order, typeName);
size_t offset = uncompr_bytes - array.num_bytes();
memcpy(array.data<unsigned char>(), &buffer_uncompr[0] + offset, array.num_bytes());
return array;
}
cnpy::npz_t cnpy::npz_load(std::string fname)
{
FILE* fp = fopen(fname.c_str(), "rb");
if (!fp) {
throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!");
}
cnpy::npz_t arrays;
while (1) {
std::vector<char> local_header(30);
size_t headerres = fread(&local_header[0], sizeof(char), 30, fp);
if (headerres != 30)
throw std::runtime_error("npz_load: failed fread");
// if we've reached the global header, stop reading
if (local_header[2] != 0x03 || local_header[3] != 0x04)
break;
// read in the variable name
uint16_t name_len = *(uint16_t*)&local_header[26];
std::string varname(name_len, ' ');
size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp);
if (vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
// erase the lagging .npy
varname.erase(varname.end() - 4, varname.end());
// read in the extra field
uint16_t extra_field_len = *(uint16_t*)&local_header[28];
if (extra_field_len > 0) {
std::vector<char> buff(extra_field_len);
size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp);
if (efield_res != extra_field_len)
throw std::runtime_error("npz_load: failed fread");
}
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0] + 8);
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 18);
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22);
if (compr_method == 0) {
arrays[varname] = load_the_npy_file(fp);
} else {
arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes);
}
}
fclose(fp);
return arrays;
}
cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname)
{
FILE* fp = fopen(fname.c_str(), "rb");
if (!fp)
throw std::runtime_error("npz_load: Unable to open file " + fname);
while (1) {
std::vector<char> local_header(30);
size_t header_res = fread(&local_header[0], sizeof(char), 30, fp);
if (header_res != 30)
throw std::runtime_error("npz_load: failed fread");
// if we've reached the global header, stop reading
if (local_header[2] != 0x03 || local_header[3] != 0x04)
break;
// read in the variable name
uint16_t name_len = *(uint16_t*)&local_header[26];
std::string vname(name_len, ' ');
size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp);
if (vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
vname.erase(vname.end() - 4, vname.end()); // erase the lagging .npy
// read in the extra field
uint16_t extra_field_len = *(uint16_t*)&local_header[28];
fseek(fp, extra_field_len, SEEK_CUR); // skip past the extra field
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0] + 8);
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 18);
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22);
if (vname == varname) {
NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes);
fclose(fp);
return array;
} else {
// skip past the data
uint32_t size = *(uint32_t*)&local_header[22];
fseek(fp, size, SEEK_CUR);
}
}
fclose(fp);
// if we get here, we haven't found the variable in the file
throw std::runtime_error("npz_load: Variable name " + varname + " not found in " + fname);
}
cnpy::NpyArray cnpy::npy_load(std::string fname)
{
FILE* fp = fopen(fname.c_str(), "rb");
if (!fp)
throw std::runtime_error("npy_load: Unable to open file " + fname);
NpyArray arr = load_the_npy_file(fp);
fclose(fp);
return arr;
}

View File

@ -0,0 +1,321 @@
// Copyright (C) 2011 Carl Rogers
// Released under MIT License
// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#ifndef LIBCNPY_H_
#define LIBCNPY_H_
#if 0
#include <zlib.h>
#endif
#include <stdint.h>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <numeric>
#include <sstream>
#include <stdexcept>
#include <string>
#include <typeinfo>
#include <vector>
namespace cnpy {
struct NpyArray
{
NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order, std::string _typeName)
: shape(_shape)
, word_size(_word_size)
, fortran_order(_fortran_order)
, typeName(_typeName)
{
num_vals = 1;
for (size_t i = 0; i < shape.size(); i++)
num_vals *= shape[i];
data_holder = std::shared_ptr<std::vector<char>>(new std::vector<char>(num_vals * word_size));
}
NpyArray()
: shape(0)
, word_size(0)
, fortran_order(0)
, num_vals(0)
{}
template <typename T>
T* data()
{
return reinterpret_cast<T*>(&(*data_holder)[0]);
}
template <typename T>
const T* data() const
{
return reinterpret_cast<T*>(&(*data_holder)[0]);
}
template <typename T>
std::vector<T> as_vec() const
{
const T* p = data<T>();
return std::vector<T>(p, p + num_vals);
}
size_t num_bytes() const { return data_holder->size(); }
std::shared_ptr<std::vector<char>> data_holder;
std::vector<size_t> shape;
size_t word_size;
bool fortran_order;
size_t num_vals;
std::string typeName;
};
using npz_t = std::map<std::string, NpyArray>;
char BigEndianTest(int size);
char map_type(const std::type_info& t);
template <typename T>
std::vector<char> create_npy_header(const std::vector<size_t>& shape);
void parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
std::string& typeName);
void parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
std::string& typeName);
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
npz_t npz_load(std::string fname);
NpyArray npz_load(std::string fname, std::string varname);
NpyArray npy_load(std::string fname);
template <typename T>
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs)
{
// write in little endian
for (size_t byte = 0; byte < sizeof(T); byte++) {
char val = *((char*)&rhs + byte);
lhs.push_back(val);
}
return lhs;
}
template <>
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
template <>
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
template <typename T>
int npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w")
{
std::ofstream ofs(fname, std::ios::out);
if (!ofs.is_open()) {
return -1;
}
ofs.close();
FILE* fp = NULL;
std::vector<size_t> true_data_shape; // if appending, the shape of existing + new data
if (mode == "a")
fp = fopen(fname.c_str(), "r+b");
if (fp) {
// file exists. we need to append to it. read the header, modify the array size
size_t word_size;
bool fortran_order;
std::string typeName;
parse_npy_header(fp, word_size, true_data_shape, fortran_order, typeName);
assert(!fortran_order);
if (word_size != sizeof(T)) {
std::cout << "libnpy error: " << fname << " has word size " << word_size << " but npy_save appending data sized "
<< sizeof(T) << "\n";
assert(word_size == sizeof(T));
}
if (true_data_shape.size() != shape.size()) {
std::cout << "libnpy error: npy_save attempting to append misdimensioned data to " << fname << "\n";
assert(true_data_shape.size() != shape.size());
}
for (size_t i = 1; i < shape.size(); i++) {
if (shape[i] != true_data_shape[i]) {
std::cout << "libnpy error: npy_save attempting to append misshaped data to " << fname << "\n";
assert(shape[i] == true_data_shape[i]);
}
}
true_data_shape[0] += shape[0];
} else {
fp = fopen(fname.c_str(), "wb");
true_data_shape = shape;
}
std::vector<char> header = create_npy_header<T>(true_data_shape);
size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
fseek(fp, 0, SEEK_SET);
fwrite(&header[0], sizeof(char), header.size(), fp);
fseek(fp, 0, SEEK_END);
fwrite(data, sizeof(T), nels, fp);
fclose(fp);
return 0;
}
template <typename T>
void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape,
std::string mode = "w")
{
// first, append a .npy to the fname
fname += ".npy";
// now, on with the show
FILE* fp = NULL;
uint16_t nrecs = 0;
size_t global_header_offset = 0;
std::vector<char> global_header;
if (mode == "a")
fp = fopen(zipname.c_str(), "r+b");
if (fp) {
// zip file exists. we need to add a new npy file to it.
// first read the footer. this gives us the offset and size of the global header
// then read and store the global header.
// below, we will write the the new data at the start of the global header then append the global header and footer
// below it
size_t global_header_size;
parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
fseek(fp, global_header_offset, SEEK_SET);
global_header.resize(global_header_size);
size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
if (res != global_header_size) {
throw std::runtime_error("npz_save: header read error while adding to existing zip");
}
fseek(fp, global_header_offset, SEEK_SET);
} else {
fp = fopen(zipname.c_str(), "wb");
}
std::vector<char> npy_header = create_npy_header<T>(shape);
size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
size_t nbytes = nels * sizeof(T) + npy_header.size();
#if 0
// get the CRC of the data to be added
uint32_t crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size());
crc = crc32(crc, (uint8_t*)data, nels * sizeof(T));
#else
uint32_t crc = 0;
#endif
// build the local header
std::vector<char> local_header;
local_header += "PK"; // first part of sig
local_header += (uint16_t)0x0403; // second part of sig
local_header += (uint16_t)20; // min version to extract
local_header += (uint16_t)0; // general purpose bit flag
local_header += (uint16_t)0; // compression method
local_header += (uint16_t)0; // file last mod time
local_header += (uint16_t)0; // file last mod date
local_header += (uint32_t)crc; // crc
local_header += (uint32_t)nbytes; // compressed size
local_header += (uint32_t)nbytes; // uncompressed size
local_header += (uint16_t)fname.size(); // fname length
local_header += (uint16_t)0; // extra field length
local_header += fname;
// build global header
global_header += "PK"; // first part of sig
global_header += (uint16_t)0x0201; // second part of sig
global_header += (uint16_t)20; // version made by
global_header.insert(global_header.end(), local_header.begin() + 4, local_header.begin() + 30);
global_header += (uint16_t)0; // file comment length
global_header += (uint16_t)0; // disk number where file starts
global_header += (uint16_t)0; // internal file attributes
global_header += (uint32_t)0; // external file attributes
global_header += (uint32_t)
global_header_offset; // relative offset of local file header, since it begins where the global header used to begin
global_header += fname;
// build footer
std::vector<char> footer;
footer += "PK"; // first part of sig
footer += (uint16_t)0x0605; // second part of sig
footer += (uint16_t)0; // number of this disk
footer += (uint16_t)0; // disk where footer starts
footer += (uint16_t)(nrecs + 1); // number of records on this disk
footer += (uint16_t)(nrecs + 1); // total number of records
footer += (uint32_t)global_header.size(); // nbytes of global headers
footer +=
(uint32_t)(global_header_offset + nbytes + local_header.size()); // offset of start of global headers, since global
// header now starts after newly written array
footer += (uint16_t)0; // zip file comment length
// write everything
fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
fwrite(data, sizeof(T), nels, fp);
fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
fwrite(&footer[0], sizeof(char), footer.size(), fp);
fclose(fp);
}
template <typename T>
void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w")
{
std::vector<size_t> shape;
shape.push_back(data.size());
npy_save(fname, &data[0], shape, mode);
}
template <typename T>
void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w")
{
std::vector<size_t> shape;
shape.push_back(data.size());
npz_save(zipname, fname, &data[0], shape, mode);
}
template <typename T>
std::vector<char> create_npy_header(const std::vector<size_t>& shape)
{
const char* tpye_name = typeid(T).name();
std::vector<char> dict;
dict += "{'descr': '";
dict += BigEndianTest(sizeof(T));
if (std::string(tpye_name) == "N4rknn7float16E") {
dict += "f";
} else {
dict += map_type(typeid(T));
}
dict += std::to_string(sizeof(T));
dict += "', 'fortran_order': False, 'shape': (";
dict += std::to_string(shape[0]);
for (size_t i = 1; i < shape.size(); i++) {
dict += ", ";
dict += std::to_string(shape[i]);
}
if (shape.size() == 1)
dict += ",";
dict += "), }";
// pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
int remainder = 16 - (10 + dict.size()) % 16;
dict.insert(dict.end(), remainder, ' ');
dict.back() = '\n';
std::vector<char> header;
header += (char)0x93;
header += "NUMPY";
header += (char)0x01; // major version of numpy format
header += (char)0x00; // minor version of numpy format
header += (uint16_t)dict.size();
header.insert(header.end(), dict.begin(), dict.end());
return header;
}
} // namespace cnpy
#endif

View File

@ -0,0 +1,327 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_BUFFER_H__
#define __MPP_BUFFER_H__
#include "rk_type.h"
#include "mpp_err.h"
/*
* MppBuffer module has several functions:
*
* 1. buffer get / put / reference management / external commit / get info.
* this part is the basic user interface for MppBuffer.
*
* function:
*
* mpp_buffer_get
* mpp_buffer_put
* mpp_buffer_inc_ref
* mpp_buffer_commit
* mpp_buffer_info_get
*
* 2. user buffer working flow control abstraction.
* buffer should attach to certain group, and buffer mode control the buffer usage flow.
* this part is also a part of user interface.
*
* function:
*
* mpp_buffer_group_get
* mpp_buffer_group_normal_get
* mpp_buffer_group_limit_get
* mpp_buffer_group_put
* mpp_buffer_group_limit_config
*
* 3. buffer allocator management
* this part is for allocator on different os, it does not have user interface
* it will support normal buffer, Android ion buffer, Linux v4l2 vb2 buffer
* user can only use MppBufferType to choose.
*
*/
/*
* mpp buffer group support two work flow mode:
*
* normal flow: all buffer are generated by MPP
* under this mode, buffer pool is maintained internally
*
* typical call flow:
*
* mpp_buffer_group_get() return A
* mpp_buffer_get(A) return a ref +1 -> used
* mpp_buffer_inc_ref(a) ref +1
* mpp_buffer_put(a) ref -1
* mpp_buffer_put(a) ref -1 -> unused
* mpp_buffer_group_put(A)
*
* commit flow: all buffer are commited out of MPP
* under this mode, buffers is commit by external api.
* normally MPP only use it but not generate it.
*
* typical call flow:
*
* ==== external allocator ====
* mpp_buffer_group_get() return A
* mpp_buffer_commit(A, x)
* mpp_buffer_commit(A, y)
*
* ======= internal user ======
* mpp_buffer_get(A) return a
* mpp_buffer_get(A) return b
* mpp_buffer_put(a)
* mpp_buffer_put(b)
*
* ==== external allocator ====
* mpp_buffer_group_put(A)
*
* NOTE: commit interface required group handle to record group information
*/
/*
* mpp buffer group has two buffer limit mode: normal and limit
*
* normal mode: allows any buffer size and always general new buffer is no unused buffer
* is available.
* This mode normally use with normal flow and is used for table / stream buffer
*
* limit mode : restrict the buffer's size and count in the buffer group. if try to calloc
* buffer with different size or extra count it will fail.
* This mode normally use with commit flow and is used for frame buffer
*/
/*
* NOTE: normal mode is recommanded to work with normal flow, working with limit mode is not.
* limit mode is recommanded to work with commit flow, working with normal mode is not.
*/
typedef enum {
MPP_BUFFER_INTERNAL,
MPP_BUFFER_EXTERNAL,
MPP_BUFFER_MODE_BUTT,
} MppBufferMode;
/*
* the mpp buffer has serval types:
*
* normal : normal malloc buffer for unit test or hardware simulation
* ion : use ion device under Android/Linux, MppBuffer will encapsulte ion file handle
* ext_dma : the DMABUF(DMA buffers) come from the application
* drm : use the drm device interface for memory management
*/
typedef enum {
MPP_BUFFER_TYPE_NORMAL,
MPP_BUFFER_TYPE_ION,
MPP_BUFFER_TYPE_EXT_DMA,
MPP_BUFFER_TYPE_DRM,
MPP_BUFFER_TYPE_DMA_HEAP,
MPP_BUFFER_TYPE_BUTT,
} MppBufferType;
#define MPP_BUFFER_TYPE_MASK 0x0000FFFF
/*
* MPP_BUFFER_FLAGS cooperate with MppBufferType
* 16 high bits of MppBufferType are used in flags
*
* eg:
* DRM CMA buffer : MPP_BUFFER_TYPE_DRM | MPP_BUFFER_FLAGS_CONTIG
* = 0x00010003
* DRM SECURE buffer: MPP_BUFFER_TYPE_DRM | MPP_BUFFER_FLAGS_SECURE
* = 0x00080003
*
* The dma buffer source can also be set by format: flags | type.
* dma buffer source flags:
* MPP_BUFFER_FLAGS_CONTIG means cma
* MPP_BUFFER_FLAGS_CACHABLE means cachable
* MPP_BUFFER_FLAGS_DMA32 means dma32
*
* flags originate from drm_rockchip_gem_mem_type
*/
#define MPP_BUFFER_FLAGS_MASK 0x003f0000 //ROCKCHIP_BO_MASK << 16
#define MPP_BUFFER_FLAGS_CONTIG 0x00010000 //ROCKCHIP_BO_CONTIG << 16
#define MPP_BUFFER_FLAGS_CACHABLE 0x00020000 //ROCKCHIP_BO_CACHABLE << 16
#define MPP_BUFFER_FLAGS_WC 0x00040000 //ROCKCHIP_BO_WC << 16
#define MPP_BUFFER_FLAGS_SECURE 0x00080000 //ROCKCHIP_BO_SECURE << 16
#define MPP_BUFFER_FLAGS_ALLOC_KMAP 0x00100000 //ROCKCHIP_BO_ALLOC_KMAP << 16
#define MPP_BUFFER_FLAGS_DMA32 0x00200000 //ROCKCHIP_BO_DMA32 << 16
/*
* MppBufferInfo variable's meaning is different in different MppBufferType
*
* Common
* index - the buffer index used to track buffer in buffer pool
* size - the buffer size
*
* MPP_BUFFER_TYPE_NORMAL
*
* ptr - virtual address of normal malloced buffer
* fd - unused and set to -1, the allocator would return its
* internal buffer counter number
*
* MPP_BUFFER_TYPE_ION
*
* ptr - virtual address of ion buffer in user space
* hnd - ion handle in user space
* fd - ion buffer file handle for map / unmap
*
*/
typedef struct MppBufferInfo_t {
MppBufferType type;
size_t size;
void *ptr;
void *hnd;
int fd;
int index;
} MppBufferInfo;
#define BUFFER_GROUP_SIZE_DEFAULT (SZ_1M*80)
/*
* mpp_buffer_import_with_tag(MppBufferGroup group, MppBufferInfo *info, MppBuffer *buffer)
*
* 1. group - specified the MppBuffer to be attached to.
* group can be NULL then this buffer will attached to default legecy group
* Default to NULL on mpp_buffer_import case
*
* 2. info - input information for the output MppBuffer
* info can NOT be NULL. It must contain at least one of ptr/fd.
*
* 3. buffer - generated MppBuffer from MppBufferInfo.
* buffer can be NULL then the buffer is commit to group with unused status.
* Otherwise generated buffer will be directly got and ref_count increased.
* Default to NULL on mpp_buffer_commit case
*
* mpp_buffer_commit usage:
*
* Add a external buffer info to group. This buffer will be on unused status.
* Typical usage is on Android. MediaPlayer gralloc Graphic buffer then commit these buffer
* to decoder's buffer group. Then decoder will recycle these buffer and return buffer reference
* to MediaPlayer for display.
*
* mpp_buffer_import usage:
*
* Transfer a external buffer info to MppBuffer but it is not expected to attached to certain
* buffer group. So the group is set to NULL. Then this buffer can be used for MppFrame/MppPacket.
* Typical usage is for image processing. Image processing normally will be a oneshot operation
* It does not need complicated group management. But in other hand mpp still need to know the
* imported buffer is leak or not and trace its usage inside mpp process. So we attach this kind
* of buffer to default misc buffer group for management.
*/
#define mpp_buffer_commit(group, info) \
mpp_buffer_import_with_tag(group, info, NULL, MODULE_TAG, __FUNCTION__)
#define mpp_buffer_import(buffer, info) \
mpp_buffer_import_with_tag(NULL, info, buffer, MODULE_TAG, __FUNCTION__)
#define mpp_buffer_get(group, buffer, size) \
mpp_buffer_get_with_tag(group, buffer, size, MODULE_TAG, __FUNCTION__)
#define mpp_buffer_put(buffer) \
mpp_buffer_put_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_inc_ref(buffer) \
mpp_buffer_inc_ref_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_info_get(buffer, info) \
mpp_buffer_info_get_with_caller(buffer, info, __FUNCTION__)
#define mpp_buffer_read(buffer, offset, data, size) \
mpp_buffer_read_with_caller(buffer, offset, data, size, __FUNCTION__)
#define mpp_buffer_write(buffer, offset, data, size) \
mpp_buffer_write_with_caller(buffer, offset, data, size, __FUNCTION__)
#define mpp_buffer_get_ptr(buffer) \
mpp_buffer_get_ptr_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_get_fd(buffer) \
mpp_buffer_get_fd_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_get_size(buffer) \
mpp_buffer_get_size_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_get_index(buffer) \
mpp_buffer_get_index_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_set_index(buffer, index) \
mpp_buffer_set_index_with_caller(buffer, index, __FUNCTION__)
#define mpp_buffer_get_offset(buffer) \
mpp_buffer_get_offset_with_caller(buffer, __FUNCTION__)
#define mpp_buffer_set_offset(buffer, offset) \
mpp_buffer_set_offset_with_caller(buffer, offset, __FUNCTION__)
#define mpp_buffer_group_get_internal(group, type, ...) \
mpp_buffer_group_get(group, type, MPP_BUFFER_INTERNAL, MODULE_TAG, __FUNCTION__)
#define mpp_buffer_group_get_external(group, type, ...) \
mpp_buffer_group_get(group, type, MPP_BUFFER_EXTERNAL, MODULE_TAG, __FUNCTION__)
#ifdef __cplusplus
extern "C" {
#endif
/*
* MppBuffer interface
* these interface will change value of group and buffer so before calling functions
* parameter need to be checked.
*
* IMPORTANT:
* mpp_buffer_import_with_tag - compounded interface for commit and import
*
*/
MPP_RET mpp_buffer_import_with_tag(MppBufferGroup group, MppBufferInfo *info, MppBuffer *buffer,
const char *tag, const char *caller);
MPP_RET mpp_buffer_get_with_tag(MppBufferGroup group, MppBuffer *buffer, size_t size,
const char *tag, const char *caller);
MPP_RET mpp_buffer_put_with_caller(MppBuffer buffer, const char *caller);
MPP_RET mpp_buffer_inc_ref_with_caller(MppBuffer buffer, const char *caller);
MPP_RET mpp_buffer_info_get_with_caller(MppBuffer buffer, MppBufferInfo *info, const char *caller);
MPP_RET mpp_buffer_read_with_caller(MppBuffer buffer, size_t offset, void *data, size_t size, const char *caller);
MPP_RET mpp_buffer_write_with_caller(MppBuffer buffer, size_t offset, void *data, size_t size, const char *caller);
void *mpp_buffer_get_ptr_with_caller(MppBuffer buffer, const char *caller);
int mpp_buffer_get_fd_with_caller(MppBuffer buffer, const char *caller);
size_t mpp_buffer_get_size_with_caller(MppBuffer buffer, const char *caller);
int mpp_buffer_get_index_with_caller(MppBuffer buffer, const char *caller);
MPP_RET mpp_buffer_set_index_with_caller(MppBuffer buffer, int index, const char *caller);
size_t mpp_buffer_get_offset_with_caller(MppBuffer buffer, const char *caller);
MPP_RET mpp_buffer_set_offset_with_caller(MppBuffer buffer, size_t offset, const char *caller);
MPP_RET mpp_buffer_group_get(MppBufferGroup *group, MppBufferType type, MppBufferMode mode,
const char *tag, const char *caller);
MPP_RET mpp_buffer_group_put(MppBufferGroup group);
MPP_RET mpp_buffer_group_clear(MppBufferGroup group);
RK_S32 mpp_buffer_group_unused(MppBufferGroup group);
size_t mpp_buffer_group_usage(MppBufferGroup group);
MppBufferMode mpp_buffer_group_mode(MppBufferGroup group);
MppBufferType mpp_buffer_group_type(MppBufferGroup group);
/*
* size : 0 - no limit, other - max buffer size
* count : 0 - no limit, other - max buffer count
*/
MPP_RET mpp_buffer_group_limit_config(MppBufferGroup group, size_t size, RK_S32 count);
RK_U32 mpp_buffer_total_now();
RK_U32 mpp_buffer_total_max();
#ifdef __cplusplus
}
#endif
#endif /*__MPP_BUFFER_H__*/

View File

@ -0,0 +1,62 @@
/*
* Copyright 2021 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_COMPAT_H__
#define __MPP_COMPAT_H__
#include "rk_type.h"
#include "mpp_err.h"
typedef enum MppCompatId_e {
MPP_COMPAT_INC_FBC_BUF_SIZE,
MPP_COMPAT_ENC_ASYNC_INPUT,
MPP_COMPAT_DEC_FBC_HDR_256_ODD,
MPP_COMPAT_BUTT,
} MppCompatId;
typedef enum MppCompatType_e {
MPP_COMPAT_BOOL,
MPP_COMPAT_S32,
MPP_COMPAT_TYPE_BUTT,
} MppCompatType;
typedef struct MppCompat_t MppCompat;
/* external user can only update value_ext to notify mpp to change its behavior */
struct MppCompat_t {
const MppCompatId feature_id;
const MppCompatType feature_type;
const RK_S32 value_mpp;
RK_S32 value_usr;
const char *name;
MppCompat * const next;
};
#ifdef __cplusplus
extern "C" {
#endif
MppCompat *mpp_compat_query(void);
MppCompat *mpp_compat_query_by_id(MppCompatId id);
MPP_RET mpp_compat_update(MppCompat *compat, RK_S32 value);
void mpp_compat_show(void);
#ifdef __cplusplus
}
#endif
#endif /*__MPP_COMPAT_H__*/

View File

@ -0,0 +1,54 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_ERR_H__
#define __MPP_ERR_H__
#define RK_OK 0
#define RK_SUCCESS 0
typedef enum {
MPP_SUCCESS = RK_SUCCESS,
MPP_OK = RK_OK,
MPP_NOK = -1,
MPP_ERR_UNKNOW = -2,
MPP_ERR_NULL_PTR = -3,
MPP_ERR_MALLOC = -4,
MPP_ERR_OPEN_FILE = -5,
MPP_ERR_VALUE = -6,
MPP_ERR_READ_BIT = -7,
MPP_ERR_TIMEOUT = -8,
MPP_ERR_PERM = -9,
MPP_ERR_BASE = -1000,
/* The error in stream processing */
MPP_ERR_LIST_STREAM = MPP_ERR_BASE - 1,
MPP_ERR_INIT = MPP_ERR_BASE - 2,
MPP_ERR_VPU_CODEC_INIT = MPP_ERR_BASE - 3,
MPP_ERR_STREAM = MPP_ERR_BASE - 4,
MPP_ERR_FATAL_THREAD = MPP_ERR_BASE - 5,
MPP_ERR_NOMEM = MPP_ERR_BASE - 6,
MPP_ERR_PROTOL = MPP_ERR_BASE - 7,
MPP_FAIL_SPLIT_FRAME = MPP_ERR_BASE - 8,
MPP_ERR_VPUHW = MPP_ERR_BASE - 9,
MPP_EOS_STREAM_REACHED = MPP_ERR_BASE - 11,
MPP_ERR_BUFFER_FULL = MPP_ERR_BASE - 12,
MPP_ERR_DISPLAY_FULL = MPP_ERR_BASE - 13,
} MPP_RET;
#endif /*__MPP_ERR_H__*/

View File

@ -0,0 +1,432 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_FRAME_H__
#define __MPP_FRAME_H__
#include "mpp_buffer.h"
#include "mpp_meta.h"
/*
* bit definition for mode flag in MppFrame
*/
/* progressive frame */
#define MPP_FRAME_FLAG_FRAME (0x00000000)
/* top field only */
#define MPP_FRAME_FLAG_TOP_FIELD (0x00000001)
/* bottom field only */
#define MPP_FRAME_FLAG_BOT_FIELD (0x00000002)
/* paired field */
#define MPP_FRAME_FLAG_PAIRED_FIELD (MPP_FRAME_FLAG_TOP_FIELD|MPP_FRAME_FLAG_BOT_FIELD)
/* paired field with field order of top first */
#define MPP_FRAME_FLAG_TOP_FIRST (0x00000004)
/* paired field with field order of bottom first */
#define MPP_FRAME_FLAG_BOT_FIRST (0x00000008)
/* paired field with unknown field order (MBAFF) */
#define MPP_FRAME_FLAG_DEINTERLACED (MPP_FRAME_FLAG_TOP_FIRST|MPP_FRAME_FLAG_BOT_FIRST)
#define MPP_FRAME_FLAG_FIELD_ORDER_MASK (0x0000000C)
// for multiview stream
#define MPP_FRAME_FLAG_VIEW_ID_MASK (0x000000f0)
#define MPP_FRAME_FLAG_IEP_DEI_MASK (0x00000f00)
#define MPP_FRAME_FLAG_IEP_DEI_I2O1 (0x00000100)
#define MPP_FRAME_FLAG_IEP_DEI_I4O2 (0x00000200)
#define MPP_FRAME_FLAG_IEP_DEI_I4O1 (0x00000300)
/*
* MPEG vs JPEG YUV range.
*/
typedef enum {
MPP_FRAME_RANGE_UNSPECIFIED = 0,
MPP_FRAME_RANGE_MPEG = 1, ///< the normal 219*2^(n-8) "MPEG" YUV ranges
MPP_FRAME_RANGE_JPEG = 2, ///< the normal 2^n-1 "JPEG" YUV ranges
MPP_FRAME_RANGE_NB, ///< Not part of ABI
} MppFrameColorRange;
typedef enum {
MPP_FRAME_VIDEO_FMT_COMPONEMT = 0,
MPP_FRAME_VIDEO_FMT_PAL = 1,
MPP_FRAME_VIDEO_FMT_NTSC = 2,
MPP_FRAME_VIDEO_FMT_SECAM = 3,
MPP_FRAME_VIDEO_FMT_MAC = 4,
MPP_FRAME_VIDEO_FMT_UNSPECIFIED = 5,
MPP_FRAME_VIDEO_FMT_RESERVED0 = 6,
MPP_FRAME_VIDEO_FMT_RESERVED1 = 7,
} MppFrameVideoFormat;
/*
* Chromaticity coordinates of the source primaries.
*/
typedef enum {
MPP_FRAME_PRI_RESERVED0 = 0,
MPP_FRAME_PRI_BT709 = 1, ///< also ITU-R BT1361 / IEC 61966-2-4 / SMPTE RP177 Annex B
MPP_FRAME_PRI_UNSPECIFIED = 2,
MPP_FRAME_PRI_RESERVED = 3,
MPP_FRAME_PRI_BT470M = 4, ///< also FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
MPP_FRAME_PRI_BT470BG = 5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
MPP_FRAME_PRI_SMPTE170M = 6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC/SMPTE ST 170 (2004)
MPP_FRAME_PRI_SMPTE240M = 7, ///< functionally identical to above/SMPTE ST 240
MPP_FRAME_PRI_FILM = 8, ///< colour filters using Illuminant C
MPP_FRAME_PRI_BT2020 = 9, ///< ITU-R BT2020 / ITU-R BT.2100-2
MPP_FRAME_PRI_SMPTEST428_1 = 10, ///< SMPTE ST 428-1 (CIE 1931 XYZ)
MPP_FRAME_PRI_SMPTE431 = 11, ///< SMPTE ST 431-2 (2011) / DCI P3
MPP_FRAME_PRI_SMPTE432 = 12, ///< SMPTE ST 432-1 (2010) / P3 D65 / Display P3
MPP_FRAME_PRI_JEDEC_P22 = 22, ///< JEDEC P22 phosphors
MPP_FRAME_PRI_NB, ///< Not part of ABI
} MppFrameColorPrimaries;
/*
* Color Transfer Characteristic.
*/
typedef enum {
MPP_FRAME_TRC_RESERVED0 = 0,
MPP_FRAME_TRC_BT709 = 1, ///< also ITU-R BT1361
MPP_FRAME_TRC_UNSPECIFIED = 2,
MPP_FRAME_TRC_RESERVED = 3,
MPP_FRAME_TRC_GAMMA22 = 4, ///< also ITU-R BT470M / ITU-R BT1700 625 PAL & SECAM
MPP_FRAME_TRC_GAMMA28 = 5, ///< also ITU-R BT470BG
MPP_FRAME_TRC_SMPTE170M = 6, ///< also ITU-R BT601-6 525 or 625 / ITU-R BT1358 525 or 625 / ITU-R BT1700 NTSC
MPP_FRAME_TRC_SMPTE240M = 7,
MPP_FRAME_TRC_LINEAR = 8, ///< "Linear transfer characteristics"
MPP_FRAME_TRC_LOG = 9, ///< "Logarithmic transfer characteristic (100:1 range)"
MPP_FRAME_TRC_LOG_SQRT = 10, ///< "Logarithmic transfer characteristic (100 * Sqrt(10) : 1 range)"
MPP_FRAME_TRC_IEC61966_2_4 = 11, ///< IEC 61966-2-4
MPP_FRAME_TRC_BT1361_ECG = 12, ///< ITU-R BT1361 Extended Colour Gamut
MPP_FRAME_TRC_IEC61966_2_1 = 13, ///< IEC 61966-2-1 (sRGB or sYCC)
MPP_FRAME_TRC_BT2020_10 = 14, ///< ITU-R BT2020 for 10 bit system
MPP_FRAME_TRC_BT2020_12 = 15, ///< ITU-R BT2020 for 12 bit system
MPP_FRAME_TRC_SMPTEST2084 = 16, ///< SMPTE ST 2084 for 10-, 12-, 14- and 16-bit systems
MPP_FRAME_TRC_SMPTEST428_1 = 17, ///< SMPTE ST 428-1
MPP_FRAME_TRC_ARIB_STD_B67 = 18, ///< ARIB STD-B67, known as "Hybrid log-gamma"
MPP_FRAME_TRC_NB, ///< Not part of ABI
} MppFrameColorTransferCharacteristic;
/*
* YUV colorspace type.
*/
typedef enum {
MPP_FRAME_SPC_RGB = 0, ///< order of coefficients is actually GBR, also IEC 61966-2-1 (sRGB)
MPP_FRAME_SPC_BT709 = 1, ///< also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
MPP_FRAME_SPC_UNSPECIFIED = 2,
MPP_FRAME_SPC_RESERVED = 3,
MPP_FRAME_SPC_FCC = 4, ///< FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
MPP_FRAME_SPC_BT470BG = 5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601
MPP_FRAME_SPC_SMPTE170M = 6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above
MPP_FRAME_SPC_SMPTE240M = 7,
MPP_FRAME_SPC_YCOCG = 8, ///< Used by Dirac / VC-2 and H.264 FRext, see ITU-T SG16
MPP_FRAME_SPC_BT2020_NCL = 9, ///< ITU-R BT2020 non-constant luminance system
MPP_FRAME_SPC_BT2020_CL = 10, ///< ITU-R BT2020 constant luminance system
MPP_FRAME_SPC_SMPTE2085 = 11, ///< SMPTE 2085, Y'D'zD'x
MPP_FRAME_SPC_CHROMA_DERIVED_NCL = 12, ///< Chromaticity-derived non-constant luminance system
MPP_FRAME_SPC_CHROMA_DERIVED_CL = 13, ///< Chromaticity-derived constant luminance system
MPP_FRAME_SPC_ICTCP = 14, ///< ITU-R BT.2100-0, ICtCp
MPP_FRAME_SPC_NB, ///< Not part of ABI
} MppFrameColorSpace;
/*
* Location of chroma samples.
*
* Illustration showing the location of the first (top left) chroma sample of the
* image, the left shows only luma, the right
* shows the location of the chroma sample, the 2 could be imagined to overlay
* each other but are drawn separately due to limitations of ASCII
*
* 1st 2nd 1st 2nd horizontal luma sample positions
* v v v v
* ______ ______
*1st luma line > |X X ... |3 4 X ... X are luma samples,
* | |1 2 1-6 are possible chroma positions
*2nd luma line > |X X ... |5 6 X ... 0 is undefined/unknown position
*/
typedef enum {
MPP_CHROMA_LOC_UNSPECIFIED = 0,
MPP_CHROMA_LOC_LEFT = 1, ///< mpeg2/4 4:2:0, h264 default for 4:2:0
MPP_CHROMA_LOC_CENTER = 2, ///< mpeg1 4:2:0, jpeg 4:2:0, h263 4:2:0
MPP_CHROMA_LOC_TOPLEFT = 3, ///< ITU-R 601, SMPTE 274M 296M S314M(DV 4:1:1), mpeg2 4:2:2
MPP_CHROMA_LOC_TOP = 4,
MPP_CHROMA_LOC_BOTTOMLEFT = 5,
MPP_CHROMA_LOC_BOTTOM = 6,
MPP_CHROMA_LOC_NB, ///< Not part of ABI
} MppFrameChromaLocation;
#define MPP_FRAME_FMT_MASK (0x000fffff)
#define MPP_FRAME_FMT_COLOR_MASK (0x000f0000)
#define MPP_FRAME_FMT_YUV (0x00000000)
#define MPP_FRAME_FMT_RGB (0x00010000)
#define MPP_FRAME_FBC_MASK (0x00f00000)
#define MPP_FRAME_FBC_NONE (0x00000000)
#define MPP_FRAME_HDR_MASK (0x0f000000)
#define MPP_FRAME_HDR_NONE (0x00000000)
#define MPP_FRAME_HDR (0x01000000)
/*
* AFBC_V1 is for ISP output.
* It has default payload offset to be calculated * from width and height:
* Payload offset = MPP_ALIGN(MPP_ALIGN(width, 16) * MPP_ALIGN(height, 16) / 16, SZ_4K)
*/
#define MPP_FRAME_FBC_AFBC_V1 (0x00100000)
/*
* AFBC_V2 is for video decoder output.
* It stores payload offset in first 32-bit in header address
* Payload offset is always set to zero.
*/
#define MPP_FRAME_FBC_AFBC_V2 (0x00200000)
#define MPP_FRAME_FMT_LE_MASK (0x01000000)
#define MPP_FRAME_FMT_IS_YUV(fmt) (((fmt & MPP_FRAME_FMT_COLOR_MASK) == MPP_FRAME_FMT_YUV) && \
((fmt & MPP_FRAME_FMT_MASK) < MPP_FMT_YUV_BUTT))
#define MPP_FRAME_FMT_IS_YUV_10BIT(fmt) ((fmt & MPP_FRAME_FMT_MASK) == MPP_FMT_YUV420SP_10BIT || \
(fmt & MPP_FRAME_FMT_MASK) == MPP_FMT_YUV422SP_10BIT)
#define MPP_FRAME_FMT_IS_RGB(fmt) (((fmt & MPP_FRAME_FMT_COLOR_MASK) == MPP_FRAME_FMT_RGB) && \
((fmt & MPP_FRAME_FMT_MASK) < MPP_FMT_RGB_BUTT))
/*
* For MPP_FRAME_FBC_AFBC_V1 the 16byte aligned stride is used.
*/
#define MPP_FRAME_FMT_IS_FBC(fmt) (fmt & MPP_FRAME_FBC_MASK)
#define MPP_FRAME_FMT_IS_HDR(fmt) (fmt & MPP_FRAME_HDR_MASK)
#define MPP_FRAME_FMT_IS_LE(fmt) ((fmt & MPP_FRAME_FMT_LE_MASK) == MPP_FRAME_FMT_LE_MASK)
#define MPP_FRAME_FMT_IS_BE(fmt) ((fmt & MPP_FRAME_FMT_LE_MASK) == 0)
/* mpp color format index definition */
typedef enum {
MPP_FMT_YUV420SP = (MPP_FRAME_FMT_YUV + 0), /* YYYY... UV... (NV12) */
/*
* A rockchip specific pixel format, without gap between pixel aganist
* the P010_10LE/P010_10BE
*/
MPP_FMT_YUV420SP_10BIT = (MPP_FRAME_FMT_YUV + 1),
MPP_FMT_YUV422SP = (MPP_FRAME_FMT_YUV + 2), /* YYYY... UVUV... (NV16) */
MPP_FMT_YUV422SP_10BIT = (MPP_FRAME_FMT_YUV + 3), ///< Not part of ABI
MPP_FMT_YUV420P = (MPP_FRAME_FMT_YUV + 4), /* YYYY... U...V... (I420) */
MPP_FMT_YUV420SP_VU = (MPP_FRAME_FMT_YUV + 5), /* YYYY... VUVUVU... (NV21) */
MPP_FMT_YUV422P = (MPP_FRAME_FMT_YUV + 6), /* YYYY... UU...VV...(422P) */
MPP_FMT_YUV422SP_VU = (MPP_FRAME_FMT_YUV + 7), /* YYYY... VUVUVU... (NV61) */
MPP_FMT_YUV422_YUYV = (MPP_FRAME_FMT_YUV + 8), /* YUYVYUYV... (YUY2) */
MPP_FMT_YUV422_YVYU = (MPP_FRAME_FMT_YUV + 9), /* YVYUYVYU... (YVY2) */
MPP_FMT_YUV422_UYVY = (MPP_FRAME_FMT_YUV + 10), /* UYVYUYVY... (UYVY) */
MPP_FMT_YUV422_VYUY = (MPP_FRAME_FMT_YUV + 11), /* VYUYVYUY... (VYUY) */
MPP_FMT_YUV400 = (MPP_FRAME_FMT_YUV + 12), /* YYYY... */
MPP_FMT_YUV440SP = (MPP_FRAME_FMT_YUV + 13), /* YYYY... UVUV... */
MPP_FMT_YUV411SP = (MPP_FRAME_FMT_YUV + 14), /* YYYY... UV... */
MPP_FMT_YUV444SP = (MPP_FRAME_FMT_YUV + 15), /* YYYY... UVUVUVUV... */
MPP_FMT_YUV444P = (MPP_FRAME_FMT_YUV + 16), /* YYYY... UUUU... VVVV... */
MPP_FMT_YUV_BUTT,
MPP_FMT_RGB565 = (MPP_FRAME_FMT_RGB + 0), /* 16-bit RGB */
MPP_FMT_BGR565 = (MPP_FRAME_FMT_RGB + 1), /* 16-bit RGB */
MPP_FMT_RGB555 = (MPP_FRAME_FMT_RGB + 2), /* 15-bit RGB */
MPP_FMT_BGR555 = (MPP_FRAME_FMT_RGB + 3), /* 15-bit RGB */
MPP_FMT_RGB444 = (MPP_FRAME_FMT_RGB + 4), /* 12-bit RGB */
MPP_FMT_BGR444 = (MPP_FRAME_FMT_RGB + 5), /* 12-bit RGB */
MPP_FMT_RGB888 = (MPP_FRAME_FMT_RGB + 6), /* 24-bit RGB */
MPP_FMT_BGR888 = (MPP_FRAME_FMT_RGB + 7), /* 24-bit RGB */
MPP_FMT_RGB101010 = (MPP_FRAME_FMT_RGB + 8), /* 30-bit RGB */
MPP_FMT_BGR101010 = (MPP_FRAME_FMT_RGB + 9), /* 30-bit RGB */
MPP_FMT_ARGB8888 = (MPP_FRAME_FMT_RGB + 10), /* 32-bit RGB */
MPP_FMT_ABGR8888 = (MPP_FRAME_FMT_RGB + 11), /* 32-bit RGB */
MPP_FMT_BGRA8888 = (MPP_FRAME_FMT_RGB + 12), /* 32-bit RGB */
MPP_FMT_RGBA8888 = (MPP_FRAME_FMT_RGB + 13), /* 32-bit RGB */
MPP_FMT_RGB_BUTT,
MPP_FMT_BUTT,
} MppFrameFormat;
/**
* Rational number (pair of numerator and denominator).
*/
typedef struct MppFrameRational {
RK_S32 num; ///< Numerator
RK_S32 den; ///< Denominator
} MppFrameRational;
typedef struct MppFrameMasteringDisplayMetadata {
RK_U16 display_primaries[3][2];
RK_U16 white_point[2];
RK_U32 max_luminance;
RK_U32 min_luminance;
} MppFrameMasteringDisplayMetadata;
typedef struct MppFrameContentLightMetadata {
RK_U16 MaxCLL;
RK_U16 MaxFALL;
} MppFrameContentLightMetadata;
typedef struct MppFrameHdrDynamicMeta {
RK_U32 hdr_fmt;
RK_U32 size;
RK_U8 data[];
} MppFrameHdrDynamicMeta;
typedef enum MppFrameError {
/* General error not specified */
MPP_FRAME_ERR_UNKNOW = 0x0001,
/* Critical error for decoder not support error */
MPP_FRAME_ERR_UNSUPPORT = 0x0002,
/*
* Fatal error for decoder can not parse a valid frame for hardware.
* the pixel data is all invalid.
*/
MPP_FRAME_ERR_DEC_INVALID = 0x0010,
/*
* Normal error for decoder found hardware error on decoding.
*/
MPP_FRAME_ERR_DEC_HW_ERR = 0x0100,
/*
* Normal error for decoder found missing reference frame on decoding.
*/
MPP_FRAME_ERR_DEC_MISS_REF = 0x0200,
} MppFrameError;
#ifdef __cplusplus
extern "C" {
#endif
/*
* MppFrame interface
*/
MPP_RET mpp_frame_init(MppFrame *frame);
MPP_RET mpp_frame_deinit(MppFrame *frame);
/*
* normal parameter
*
* offset_x
* <-------->
*
* <---------------+ hor_stride +--------------->
*
* +------------------------------------------------------+ ^ ^
* | | | |
* | | | | offset_y
* | | | |
* | +--------------------------------+ ^ | | v
* | | | | | |
* | | | + | +
* | | | |
* | | valid data area | height | ver_stride
* | | | |
* | | | + | +
* | | | | | |
* | +--------------------------------+ v | |
* | | |
* | <----------+ width +---------> | |
* | | |
* +------------------------------------------------------+ v
*
*/
RK_U32 mpp_frame_get_width(const MppFrame frame);
void mpp_frame_set_width(MppFrame frame, RK_U32 width);
RK_U32 mpp_frame_get_height(const MppFrame frame);
void mpp_frame_set_height(MppFrame frame, RK_U32 height);
RK_U32 mpp_frame_get_hor_stride(const MppFrame frame);
void mpp_frame_set_hor_stride(MppFrame frame, RK_U32 hor_stride);
RK_U32 mpp_frame_get_ver_stride(const MppFrame frame);
void mpp_frame_set_ver_stride(MppFrame frame, RK_U32 ver_stride);
void mpp_frame_set_hor_stride_pixel(MppFrame frame, RK_U32 hor_stride_pixel);
RK_U32 mpp_frame_get_hor_stride_pixel(const MppFrame frame);
void mpp_frame_set_fbc_hdr_stride(MppFrame frame, RK_U32 fbc_hdr_stride);
RK_U32 mpp_frame_get_fbc_hdr_stride(const MppFrame frame);
RK_U32 mpp_frame_get_offset_x(const MppFrame frame);
void mpp_frame_set_offset_x(MppFrame frame, RK_U32 offset_x);
RK_U32 mpp_frame_get_offset_y(const MppFrame frame);
void mpp_frame_set_offset_y(MppFrame frame, RK_U32 offset_y);
RK_U32 mpp_frame_get_mode(const MppFrame frame);
void mpp_frame_set_mode(MppFrame frame, RK_U32 mode);
RK_U32 mpp_frame_get_discard(const MppFrame frame);
void mpp_frame_set_discard(MppFrame frame, RK_U32 discard);
RK_U32 mpp_frame_get_viewid(const MppFrame frame);
void mpp_frame_set_viewid(MppFrame frame, RK_U32 viewid);
RK_U32 mpp_frame_get_poc(const MppFrame frame);
void mpp_frame_set_poc(MppFrame frame, RK_U32 poc);
RK_S64 mpp_frame_get_pts(const MppFrame frame);
void mpp_frame_set_pts(MppFrame frame, RK_S64 pts);
RK_S64 mpp_frame_get_dts(const MppFrame frame);
void mpp_frame_set_dts(MppFrame frame, RK_S64 dts);
RK_U32 mpp_frame_get_errinfo(const MppFrame frame);
void mpp_frame_set_errinfo(MppFrame frame, RK_U32 errinfo);
size_t mpp_frame_get_buf_size(const MppFrame frame);
void mpp_frame_set_buf_size(MppFrame frame, size_t buf_size);
void mpp_frame_set_thumbnail_en(MppFrame frame, RK_U32 thumbnail_en);
RK_U32 mpp_frame_get_thumbnail_en(const MppFrame frame);
/*
* flow control parmeter
*/
RK_U32 mpp_frame_get_eos(const MppFrame frame);
void mpp_frame_set_eos(MppFrame frame, RK_U32 eos);
RK_U32 mpp_frame_get_info_change(const MppFrame frame);
void mpp_frame_set_info_change(MppFrame frame, RK_U32 info_change);
/*
* buffer parameter
*/
MppBuffer mpp_frame_get_buffer(const MppFrame frame);
void mpp_frame_set_buffer(MppFrame frame, MppBuffer buffer);
/*
* meta data parameter
*/
RK_S32 mpp_frame_has_meta(const MppFrame frame);
MppMeta mpp_frame_get_meta(const MppFrame frame);
void mpp_frame_set_meta(MppFrame frame, MppMeta meta);
/*
* color related parameter
*/
MppFrameColorRange mpp_frame_get_color_range(const MppFrame frame);
void mpp_frame_set_color_range(MppFrame frame, MppFrameColorRange color_range);
MppFrameColorPrimaries mpp_frame_get_color_primaries(const MppFrame frame);
void mpp_frame_set_color_primaries(MppFrame frame, MppFrameColorPrimaries color_primaries);
MppFrameColorTransferCharacteristic mpp_frame_get_color_trc(const MppFrame frame);
void mpp_frame_set_color_trc(MppFrame frame, MppFrameColorTransferCharacteristic color_trc);
MppFrameColorSpace mpp_frame_get_colorspace(const MppFrame frame);
void mpp_frame_set_colorspace(MppFrame frame, MppFrameColorSpace colorspace);
MppFrameChromaLocation mpp_frame_get_chroma_location(const MppFrame frame);
void mpp_frame_set_chroma_location(MppFrame frame, MppFrameChromaLocation chroma_location);
MppFrameFormat mpp_frame_get_fmt(MppFrame frame);
void mpp_frame_set_fmt(MppFrame frame, MppFrameFormat fmt);
MppFrameRational mpp_frame_get_sar(const MppFrame frame);
void mpp_frame_set_sar(MppFrame frame, MppFrameRational sar);
MppFrameMasteringDisplayMetadata mpp_frame_get_mastering_display(const MppFrame frame);
void mpp_frame_set_mastering_display(MppFrame frame, MppFrameMasteringDisplayMetadata mastering_display);
MppFrameContentLightMetadata mpp_frame_get_content_light(const MppFrame frame);
void mpp_frame_set_content_light(MppFrame frame, MppFrameContentLightMetadata content_light);
MppFrameHdrDynamicMeta* mpp_frame_get_hdr_dynamic_meta(const MppFrame frame);
void mpp_frame_set_hdr_dynamic_meta(MppFrame frame, MppFrameHdrDynamicMeta *vivi_data);
/*
* HDR parameter
*/
#ifdef __cplusplus
}
#endif
#endif /*__MPP_FRAME_H__*/

View File

@ -0,0 +1,99 @@
/*
* Copyright 2022 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_LOG_H__
#define __MPP_LOG_H__
#include "rk_type.h"
#include "mpp_log_def.h"
/*
* _c function will add condition check
* _f function will add function name to the log
* _cf function will add both function name and condition check
*/
/*
* mpp runtime log system usage:
* mpp_logf is for fatal logging. For use when aborting
* mpp_loge is for error logging. For use with unrecoverable failures.
* mpp_logw is for warning logging. For use with recoverable failures.
* mpp_logi is for informational logging.
* mpp_logd is for debug logging.
* mpp_logv is for verbose logging
*/
#define mpp_logf(fmt, ...) _mpp_log_l(MPP_LOG_FATAL, MODULE_TAG, fmt, NULL, ## __VA_ARGS__)
#define mpp_loge(fmt, ...) _mpp_log_l(MPP_LOG_ERROR, MODULE_TAG, fmt, NULL, ## __VA_ARGS__)
#define mpp_logw(fmt, ...) _mpp_log_l(MPP_LOG_WARN, MODULE_TAG, fmt, NULL, ## __VA_ARGS__)
#define mpp_logi(fmt, ...) _mpp_log_l(MPP_LOG_INFO, MODULE_TAG, fmt, NULL, ## __VA_ARGS__)
#define mpp_logd(fmt, ...) _mpp_log_l(MPP_LOG_DEBUG, MODULE_TAG, fmt, NULL, ## __VA_ARGS__)
#define mpp_logv(fmt, ...) _mpp_log_l(MPP_LOG_VERBOSE, MODULE_TAG, fmt, NULL, ## __VA_ARGS__)
#define mpp_logf_f(fmt, ...) _mpp_log_l(MPP_LOG_FATAL, MODULE_TAG, fmt, __FUNCTION__, ## __VA_ARGS__)
#define mpp_loge_f(fmt, ...) _mpp_log_l(MPP_LOG_ERROR, MODULE_TAG, fmt, __FUNCTION__, ## __VA_ARGS__)
#define mpp_logw_f(fmt, ...) _mpp_log_l(MPP_LOG_WARN, MODULE_TAG, fmt, __FUNCTION__, ## __VA_ARGS__)
#define mpp_logi_f(fmt, ...) _mpp_log_l(MPP_LOG_INFO, MODULE_TAG, fmt, __FUNCTION__, ## __VA_ARGS__)
#define mpp_logd_f(fmt, ...) _mpp_log_l(MPP_LOG_DEBUG, MODULE_TAG, fmt, __FUNCTION__, ## __VA_ARGS__)
#define mpp_logv_f(fmt, ...) _mpp_log_l(MPP_LOG_VERBOSE, MODULE_TAG, fmt, __FUNCTION__, ## __VA_ARGS__)
#define mpp_logf_c(cond, fmt, ...) do { if (cond) mpp_logf(fmt, ## __VA_ARGS__); } while (0)
#define mpp_loge_c(cond, fmt, ...) do { if (cond) mpp_loge(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logw_c(cond, fmt, ...) do { if (cond) mpp_logw(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logi_c(cond, fmt, ...) do { if (cond) mpp_logi(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logd_c(cond, fmt, ...) do { if (cond) mpp_logd(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logv_c(cond, fmt, ...) do { if (cond) mpp_logv(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logf_cf(cond, fmt, ...) do { if (cond) mpp_logf_f(fmt, ## __VA_ARGS__); } while (0)
#define mpp_loge_cf(cond, fmt, ...) do { if (cond) mpp_loge_f(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logw_cf(cond, fmt, ...) do { if (cond) mpp_logw_f(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logi_cf(cond, fmt, ...) do { if (cond) mpp_logi_f(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logd_cf(cond, fmt, ...) do { if (cond) mpp_logd_f(fmt, ## __VA_ARGS__); } while (0)
#define mpp_logv_cf(cond, fmt, ...) do { if (cond) mpp_logv_f(fmt, ## __VA_ARGS__); } while (0)
/*
* mpp runtime log system usage:
* mpp_err is for error status message, it will print for sure.
* mpp_log is for important message like open/close/reset/flush, it will print too.
*/
#define mpp_log(fmt, ...) mpp_logi(fmt, ## __VA_ARGS__)
#define mpp_err(fmt, ...) mpp_loge(fmt, ## __VA_ARGS__)
#define mpp_log_f(fmt, ...) mpp_logi_f(fmt, ## __VA_ARGS__)
#define mpp_err_f(fmt, ...) mpp_loge_f(fmt, ## __VA_ARGS__)
#define mpp_log_c(cond, fmt, ...) do { if (cond) mpp_log(fmt, ## __VA_ARGS__); } while (0)
#define mpp_log_cf(cond, fmt, ...) do { if (cond) mpp_log_f(fmt, ## __VA_ARGS__); } while (0)
#ifdef __cplusplus
extern "C" {
#endif
void _mpp_log_l(int level, const char *tag, const char *fmt, const char *func, ...);
void mpp_set_log_level(int level);
int mpp_get_log_level(void);
/* deprecated function */
void _mpp_log(const char *tag, const char *fmt, const char *func, ...);
void _mpp_err(const char *tag, const char *fmt, const char *func, ...);
#ifdef __cplusplus
}
#endif
#endif /*__MPP_LOG_H__*/

View File

@ -0,0 +1,37 @@
/*
* Copyright 2022 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_LOG_DEF_H__
#define __MPP_LOG_DEF_H__
#ifdef __cplusplus
extern "C" {
#endif
#define MPP_LOG_UNKNOWN 0 /* internal use only */
#define MPP_LOG_FATAL 1 /* fatal error on aborting */
#define MPP_LOG_ERROR 2 /* error log on unrecoverable failures */
#define MPP_LOG_WARN 3 /* warning log on recoverable failures */
#define MPP_LOG_INFO 4 /* Informational log */
#define MPP_LOG_DEBUG 5 /* Debug log */
#define MPP_LOG_VERBOSE 6 /* Verbose log */
#define MPP_LOG_SILENT 7 /* internal use only */
#ifdef __cplusplus
}
#endif
#endif /*__MPP_LOG_DEF_H__*/

View File

@ -0,0 +1,179 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_META_H__
#define __MPP_META_H__
#include <stdint.h>
#include "rk_type.h"
#define FOURCC_META(a, b, c, d) ((RK_U32)(a) << 24 | \
((RK_U32)(b) << 16) | \
((RK_U32)(c) << 8) | \
((RK_U32)(d) << 0))
/*
* Mpp Metadata definition
*
* Metadata is for information transmision in mpp.
* Mpp task will contain two meta data:
*
* 1. Data flow metadata
* This metadata contains information of input / output data flow. For example
* A. decoder input side task the input packet must be defined and output frame
* may not be defined. Then decoder will try malloc or use committed buffer to
* complete decoding.
* B. decoder output side task
*
*
* 2. Flow control metadata
*
*/
typedef enum MppMetaDataType_e {
/*
* mpp meta data of data flow
* reference counter will be used for these meta data type
*/
TYPE_FRAME = FOURCC_META('m', 'f', 'r', 'm'),
TYPE_PACKET = FOURCC_META('m', 'p', 'k', 't'),
TYPE_BUFFER = FOURCC_META('m', 'b', 'u', 'f'),
/* mpp meta data of normal data type */
TYPE_S32 = FOURCC_META('s', '3', '2', ' '),
TYPE_S64 = FOURCC_META('s', '6', '4', ' '),
TYPE_PTR = FOURCC_META('p', 't', 'r', ' '),
} MppMetaType;
typedef enum MppMetaKey_e {
/* data flow key */
KEY_INPUT_FRAME = FOURCC_META('i', 'f', 'r', 'm'),
KEY_INPUT_PACKET = FOURCC_META('i', 'p', 'k', 't'),
KEY_OUTPUT_FRAME = FOURCC_META('o', 'f', 'r', 'm'),
KEY_OUTPUT_PACKET = FOURCC_META('o', 'p', 'k', 't'),
/* output motion information for motion detection */
KEY_MOTION_INFO = FOURCC_META('m', 'v', 'i', 'f'),
KEY_HDR_INFO = FOURCC_META('h', 'd', 'r', ' '),
KEY_HDR_META_OFFSET = FOURCC_META('h', 'd', 'r', 'o'),
KEY_HDR_META_SIZE = FOURCC_META('h', 'd', 'r', 'l'),
/* flow control key */
KEY_INPUT_BLOCK = FOURCC_META('i', 'b', 'l', 'k'),
KEY_OUTPUT_BLOCK = FOURCC_META('o', 'b', 'l', 'k'),
KEY_INPUT_IDR_REQ = FOURCC_META('i', 'i', 'd', 'r'), /* input idr frame request flag */
KEY_OUTPUT_INTRA = FOURCC_META('o', 'i', 'd', 'r'), /* output intra frame indicator */
/* mpp_frame / mpp_packet meta data info key */
KEY_TEMPORAL_ID = FOURCC_META('t', 'l', 'i', 'd'),
KEY_LONG_REF_IDX = FOURCC_META('l', 't', 'i', 'd'),
KEY_ENC_AVERAGE_QP = FOURCC_META('a', 'v', 'g', 'q'),
KEY_ROI_DATA = FOURCC_META('r', 'o', 'i', ' '),
KEY_OSD_DATA = FOURCC_META('o', 's', 'd', ' '),
KEY_OSD_DATA2 = FOURCC_META('o', 's', 'd', '2'),
KEY_USER_DATA = FOURCC_META('u', 's', 'r', 'd'),
KEY_USER_DATAS = FOURCC_META('u', 'r', 'd', 's'),
/*
* For vepu580 roi buffer config mode
* The encoder roi structure is so complex that we should provide a buffer
* tunnel for externl user to config encoder hardware by direct sending
* roi data buffer.
* This way can reduce the config parsing and roi buffer data generating
* overhead in mpp.
*/
KEY_ROI_DATA2 = FOURCC_META('r', 'o', 'i', '2'),
/*
* qpmap for rv1109/1126 encoder qpmap config
* Input data is a MppBuffer which contains an array of 16bit Vepu541RoiCfg.
* And each 16bit represents a 16x16 block qp info.
*
* H.264 - 16x16 block qp is arranged in raster order:
* each value is a 16bit data
* 00 01 02 03 04 05 06 07 -> 00 01 02 03 04 05 06 07
* 10 11 12 13 14 15 16 17 10 11 12 13 14 15 16 17
* 20 21 22 23 24 25 26 27 20 21 22 23 24 25 26 27
* 30 31 32 33 34 35 36 37 30 31 32 33 34 35 36 37
*
* H.265 - 16x16 block qp is reorder to 64x64/32x32 ctu order then 64x64 / 32x32 ctu raster order
* 64x64 ctu
* 00 01 02 03 04 05 06 07 -> 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33 04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37
* 10 11 12 13 14 15 16 17
* 20 21 22 23 24 25 26 27
* 30 31 32 33 34 35 36 37
* 32x32 ctu
* 00 01 02 03 04 05 06 07 -> 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17
* 10 11 12 13 14 15 16 17 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37
* 20 21 22 23 24 25 26 27
* 30 31 32 33 34 35 36 37
*/
KEY_QPMAP0 = FOURCC_META('e', 'q', 'm', '0'),
/* input motion list for smart p rate control */
KEY_MV_LIST = FOURCC_META('m', 'v', 'l', 't'),
/* frame long-term reference frame operation */
KEY_ENC_MARK_LTR = FOURCC_META('m', 'l', 't', 'r'),
KEY_ENC_USE_LTR = FOURCC_META('u', 'l', 't', 'r'),
/* MLVEC specified encoder feature */
KEY_ENC_FRAME_QP = FOURCC_META('f', 'r', 'm', 'q'),
KEY_ENC_BASE_LAYER_PID = FOURCC_META('b', 'p', 'i', 'd'),
/* Thumbnail info for decoder output frame */
KEY_DEC_TBN_EN = FOURCC_META('t', 'b', 'e', 'n'),
KEY_DEC_TBN_Y_OFFSET = FOURCC_META('t', 'b', 'y', 'o'),
KEY_DEC_TBN_UV_OFFSET = FOURCC_META('t', 'b', 'c', 'o'),
} MppMetaKey;
#define mpp_meta_get(meta) mpp_meta_get_with_tag(meta, MODULE_TAG, __FUNCTION__)
#include "mpp_frame.h"
#include "mpp_packet.h"
#ifdef __cplusplus
extern "C" {
#endif
MPP_RET mpp_meta_get_with_tag(MppMeta *meta, const char *tag, const char *caller);
MPP_RET mpp_meta_put(MppMeta meta);
RK_S32 mpp_meta_size(MppMeta meta);
MPP_RET mpp_meta_set_s32(MppMeta meta, MppMetaKey key, RK_S32 val);
MPP_RET mpp_meta_set_s64(MppMeta meta, MppMetaKey key, RK_S64 val);
MPP_RET mpp_meta_set_ptr(MppMeta meta, MppMetaKey key, void *val);
MPP_RET mpp_meta_get_s32(MppMeta meta, MppMetaKey key, RK_S32 *val);
MPP_RET mpp_meta_get_s64(MppMeta meta, MppMetaKey key, RK_S64 *val);
MPP_RET mpp_meta_get_ptr(MppMeta meta, MppMetaKey key, void **val);
MPP_RET mpp_meta_set_frame (MppMeta meta, MppMetaKey key, MppFrame frame);
MPP_RET mpp_meta_set_packet(MppMeta meta, MppMetaKey key, MppPacket packet);
MPP_RET mpp_meta_set_buffer(MppMeta meta, MppMetaKey key, MppBuffer buffer);
MPP_RET mpp_meta_get_frame (MppMeta meta, MppMetaKey key, MppFrame *frame);
MPP_RET mpp_meta_get_packet(MppMeta meta, MppMetaKey key, MppPacket *packet);
MPP_RET mpp_meta_get_buffer(MppMeta meta, MppMetaKey key, MppBuffer *buffer);
MPP_RET mpp_meta_get_s32_d(MppMeta meta, MppMetaKey key, RK_S32 *val, RK_S32 def);
MPP_RET mpp_meta_get_s64_d(MppMeta meta, MppMetaKey key, RK_S64 *val, RK_S64 def);
MPP_RET mpp_meta_get_ptr_d(MppMeta meta, MppMetaKey key, void **val, void *def);
MPP_RET mpp_meta_get_frame_d(MppMeta meta, MppMetaKey key, MppFrame *frame, MppFrame def);
MPP_RET mpp_meta_get_packet_d(MppMeta meta, MppMetaKey key, MppPacket *packet, MppPacket def);
MPP_RET mpp_meta_get_buffer_d(MppMeta meta, MppMetaKey key, MppBuffer *buffer, MppBuffer def);
#ifdef __cplusplus
}
#endif
#endif /*__MPP_META_H__*/

View File

@ -0,0 +1,117 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_PACKET_H__
#define __MPP_PACKET_H__
#include "mpp_meta.h"
#ifdef __cplusplus
extern "C" {
#endif
/*
* MppPacket interface
*
* mpp_packet_init = mpp_packet_new + mpp_packet_set_data + mpp_packet_set_size
* mpp_packet_copy_init = mpp_packet_init + memcpy
*/
MPP_RET mpp_packet_new(MppPacket *packet);
MPP_RET mpp_packet_init(MppPacket *packet, void *data, size_t size);
MPP_RET mpp_packet_init_with_buffer(MppPacket *packet, MppBuffer buffer);
MPP_RET mpp_packet_copy_init(MppPacket *packet, const MppPacket src);
MPP_RET mpp_packet_deinit(MppPacket *packet);
/*
* data : ( R/W ) start address of the whole packet memory
* size : ( R/W ) total size of the whole packet memory
* pos : ( R/W ) current access position of the whole packet memory, used for buffer read/write
* length : ( R/W ) the rest length from current position to end of buffer
* NOTE: normally length is updated only by set_pos,
* so set length must be used carefully for special usage
*/
void mpp_packet_set_data(MppPacket packet, void *data);
void mpp_packet_set_size(MppPacket packet, size_t size);
void mpp_packet_set_pos(MppPacket packet, void *pos);
void mpp_packet_set_length(MppPacket packet, size_t size);
void* mpp_packet_get_data(const MppPacket packet);
void* mpp_packet_get_pos(const MppPacket packet);
size_t mpp_packet_get_size(const MppPacket packet);
size_t mpp_packet_get_length(const MppPacket packet);
void mpp_packet_set_pts(MppPacket packet, RK_S64 pts);
RK_S64 mpp_packet_get_pts(const MppPacket packet);
void mpp_packet_set_dts(MppPacket packet, RK_S64 dts);
RK_S64 mpp_packet_get_dts(const MppPacket packet);
void mpp_packet_set_flag(MppPacket packet, RK_U32 flag);
RK_U32 mpp_packet_get_flag(const MppPacket packet);
MPP_RET mpp_packet_set_eos(MppPacket packet);
MPP_RET mpp_packet_clr_eos(MppPacket packet);
RK_U32 mpp_packet_get_eos(MppPacket packet);
MPP_RET mpp_packet_set_extra_data(MppPacket packet);
void mpp_packet_set_buffer(MppPacket packet, MppBuffer buffer);
MppBuffer mpp_packet_get_buffer(const MppPacket packet);
/*
* data access interface
*/
MPP_RET mpp_packet_read(MppPacket packet, size_t offset, void *data, size_t size);
MPP_RET mpp_packet_write(MppPacket packet, size_t offset, void *data, size_t size);
/*
* meta data access interface
*/
RK_S32 mpp_packet_has_meta(const MppPacket packet);
MppMeta mpp_packet_get_meta(const MppPacket packet);
/*
* multi packet sequence interface for slice/split encoding/decoding
* partition - the packet is a part of a while image
* soi - Start Of Image
* eoi - End Of Image
*/
RK_U32 mpp_packet_is_partition(const MppPacket packet);
RK_U32 mpp_packet_is_soi(const MppPacket packet);
RK_U32 mpp_packet_is_eoi(const MppPacket packet);
/*
* packet segement pack info for
* segment number - number of segment
* segment info - base address of segment info
*/
typedef struct MppPktSeg_t MppPktSeg;
struct MppPktSeg_t {
RK_S32 index;
RK_S32 type;
RK_U32 offset;
RK_U32 len;
const MppPktSeg *next;
};
RK_U32 mpp_packet_get_segment_nb(const MppPacket packet);
const MppPktSeg *mpp_packet_get_segment_info(const MppPacket packet);
#ifdef __cplusplus
}
#endif
#endif /*__MPP_PACKET_H__*/

View File

@ -0,0 +1,257 @@
/*
* Copyright 2016 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_RC_API_H__
#define __MPP_RC_API_H__
#include "mpp_err.h"
#include "rk_venc_rc.h"
#include "mpp_rc_defs.h"
/*
* Mpp rate control has three parts:
*
* 1. MPI user config module
* MppEncRcCfg structure is provided to user for overall rate control config
* Mpp will receive MppEncRcCfg from user, check parameter and set it to
* encoder.
*
* 2. Encoder rate control module
* Encoder will implement the rate control strategy required by users
* including CBR, VBR, AVBR and so on.
* This module only implement the target bit calculation behavior and
* quality restriction. And the quality level will be controlled by hal.
*
* 3. Hal rate control module
* Hal will implement the rate control on hardware. Hal will calculate the
* QP parameter for hardware according to the frame level target bit
* specified by the encoder. And the report the real bitrate and quality to
* encoder.
*
* The header defines the communication interfaces and structures used between
* MPI, encoder and hal.
*/
typedef enum RcMode_e {
RC_VBR,
RC_CBR,
RC_FIXQP,
RC_AVBR,
RC_CVBR,
RC_QVBR,
RC_LEARNING,
RC_MODE_BUTT,
} RcMode;
typedef enum GopMode_e {
NORMAL_P,
SMART_P,
} GopMode;
/*
* frame rate parameters have great effect on rate control
*
* fps_in_flex
* 0 - fix input frame rate
* 1 - variable input frame rate
*
* fps_in_num
* input frame rate numerator, if 0 then default 30
*
* fps_in_denorm
* input frame rate denorminator, if 0 then default 1
*
* fps_out_flex
* 0 - fix output frame rate
* 1 - variable output frame rate
*
* fps_out_num
* output frame rate numerator, if 0 then default 30
*
* fps_out_denorm
* output frame rate denorminator, if 0 then default 1
*/
typedef struct RcFpsCfg_t {
RK_S32 fps_in_flex;
RK_S32 fps_in_num;
RK_S32 fps_in_denorm;
RK_S32 fps_out_flex;
RK_S32 fps_out_num;
RK_S32 fps_out_denorm;
} RcFpsCfg;
typedef struct RcSuperframeCfg_t {
MppEncRcSuperFrameMode super_mode;
RK_U32 super_i_thd;
RK_U32 super_p_thd;
MppEncRcPriority rc_priority;
} RcSuperframeCfg;
typedef struct RcDebreathCfg_t {
RK_U32 enable;
RK_U32 strength;
} RcDebreathCfg;
typedef struct RcHierQPCfg_t {
RK_S32 hier_qp_en;
RK_S32 hier_qp_delta[4];
RK_S32 hier_frame_num[4];
} RcHierQPCfg;
/*
* Control parameter from external config
*
* It will be updated on rc/prep/gopref config changed.
*/
typedef struct RcCfg_s {
/* encode image size */
RK_S32 width;
RK_S32 height;
/* Use rc_mode to find different api */
RcMode mode;
RcFpsCfg fps;
GopMode gop_mode;
/* I frame gop len */
RK_S32 igop;
/* visual gop len */
RK_S32 vgop;
/* bitrate parameter */
RK_S32 bps_min;
RK_S32 bps_target;
RK_S32 bps_max;
RK_S32 stats_time;
/* max I frame bit ratio to P frame bit */
RK_S32 max_i_bit_prop;
RK_S32 min_i_bit_prop;
RK_S32 init_ip_ratio;
/* layer bitrate proportion */
RK_S32 layer_bit_prop[4];
/* quality parameter */
RK_S32 init_quality;
RK_S32 max_quality;
RK_S32 min_quality;
RK_S32 max_i_quality;
RK_S32 min_i_quality;
RK_S32 i_quality_delta;
RK_S32 vi_quality_delta;
/* layer quality proportion */
RK_S32 layer_quality_delta[4];
/* reencode parameter */
RK_S32 max_reencode_times;
/* still / motion desision parameter */
RK_S32 min_still_prop;
RK_S32 max_still_quality;
/*
* vbr parameter
*
* vbr_hi_prop - high proportion bitrate for reduce quality
* vbr_lo_prop - low proportion bitrate for increase quality
*/
RK_S32 vbr_hi_prop;
RK_S32 vbr_lo_prop;
MppEncRcDropFrmMode drop_mode;
RK_U32 drop_thd;
RK_U32 drop_gap;
RcSuperframeCfg super_cfg;
RcDebreathCfg debreath_cfg;
RcHierQPCfg hier_qp_cfg;
RK_U32 refresh_len;
} RcCfg;
/*
* Different rate control strategy will be implemented by different API config
*/
typedef struct RcImplApi_t {
char *name;
MppCodingType type;
RK_U32 ctx_size;
MPP_RET (*init)(void *ctx, RcCfg *cfg);
MPP_RET (*deinit)(void *ctx);
MPP_RET (*check_drop)(void *ctx, EncRcTask *task);
MPP_RET (*check_reenc)(void *ctx, EncRcTask *task);
/*
* frm_start - frame level rate control frm_start.
* The EncRcTaskInfo will be output to hal for hardware to implement.
* frm_end - frame level rate control frm_end.
* The EncRcTaskInfo is returned for real quality and bitrate.
*/
MPP_RET (*frm_start)(void *ctx, EncRcTask *task);
MPP_RET (*frm_end)(void *ctx, EncRcTask *task);
/*
* hal_start - hardware level rate control start.
* The EncRcTaskInfo will be output to hal for hardware to implement.
* hal_end - hardware level rate control end.
* The EncRcTaskInfo is returned for real quality and bitrate.
*/
MPP_RET (*hal_start)(void *ctx, EncRcTask *task);
MPP_RET (*hal_end)(void *ctx, EncRcTask *task);
} RcImplApi;
/*
* structures for RC API register and query
*/
typedef struct RcApiBrief_t {
const char *name;
MppCodingType type;
} RcApiBrief;
typedef struct RcApiQueryAll_t {
/* input param for query */
RcApiBrief *brief;
RK_S32 max_count;
/* output query count */
RK_S32 count;
} RcApiQueryAll;
typedef struct RcApiQueryType_t {
/* input param for query */
RcApiBrief *brief;
RK_S32 max_count;
MppCodingType type;
/* output query count */
RK_S32 count;
} RcApiQueryType;
#ifdef __cplusplus
extern "C" {
#endif
MPP_RET rc_api_add(const RcImplApi *api);
MPP_RET rc_brief_get_all(RcApiQueryAll *query);
MPP_RET rc_brief_get_by_type(RcApiQueryType *query);
#ifdef __cplusplus
}
#endif
#endif /* __MPP_RC_API_H__ */

View File

@ -0,0 +1,212 @@
/*
* Copyright 2016 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_RC_DEFS_H__
#define __MPP_RC_DEFS_H__
#include "rk_venc_ref.h"
#define MAX_CPB_REFS (8)
typedef enum EncFrmType_e {
INTER_P_FRAME = 0,
INTER_B_FRAME = 1,
INTRA_FRAME = 2,
INTER_VI_FRAME = 3,
INTRA_RFH_FRAME = 4,
} EncFrmType;
/*
* EncFrmStatus controls record the encoding frame status and also control
* work flow of encoder. It is the communicat channel between encoder implement
* module, rate control module and hardware module.
*
* bit 0 ~ 31 frame status
* 0 ~ 15 current frame status
* 16 ~ 31 reference frame status
* bit 32 ~ 63 encoding flow control
*/
typedef union EncFrmStatus_u {
struct {
/*
* bit 0 ~ 31 frame status
*/
/* status flag */
RK_U32 valid : 1;
/*
* 0 - write the reconstructed frame pixel to memory
* 1 - do not write the reconstructed frame pixel to memory
*/
RK_U32 non_recn : 1;
/*
* 0 - normal frame and normal dpb management
* 1 - save recon frame as first pass extra frame. Used in two pass mode
*/
RK_U32 save_pass1 : 1;
/*
* 0 - use normal input source frame as input
* 1 - use the previously stored first pass recon frame as input frame
*/
RK_U32 use_pass1 : 1;
/* reference status flag */
/*
* 0 - inter frame
* 1 - intra frame
*/
RK_U32 is_intra : 1;
/*
* Valid when is_intra is true
* 0 - normal intra frame
* 1 - IDR frame
*/
RK_U32 is_idr : 1;
/*
* 0 - mark as reference frame
* 1 - mark as non-refernce frame
*/
RK_U32 is_non_ref : 1;
/*
* Valid when is_non_ref is false
* 0 - mark as short-term reference frame
* 1 - mark as long-term refernce frame
*/
RK_U32 is_lt_ref : 1;
/* bit 8 - 15 */
RK_U32 lt_idx : 4;
RK_U32 temporal_id : 4;
/* distance between current frame and reference frame */
MppEncRefMode ref_mode : 6;
RK_S32 ref_arg : 8;
RK_S32 ref_dist : 2;
/*
* bit 32 ~ 63 encoder flow control flags
*/
/*
* 0 - normal frame encoding
* 1 - current frame will be dropped
*/
RK_U32 drop : 1;
/*
* 0 - rate control module does not change frame type parameter
* 1 - rate control module changes frame type parameter reencode is needed
* to reprocess the dpb process. Also this means dpb module will follow
* the frame status parameter provided by rate control module.
*/
RK_U32 re_dpb_proc : 1;
/*
* 0 - current frame encoding is in normal flow
* 1 - current frame encoding is in reencode flow
*/
RK_U32 reencode : 1;
/*
* When true current frame size is super large then the frame should be reencoded.
*/
RK_U32 super_frame : 1;
/*
* When true currnet frame is force to encoded as software skip frame
*/
RK_U32 force_pskip : 1;
/*
* Current frame is intra refresh frame
*/
RK_U32 is_i_refresh : 1;
/*
* Current frame needs add recovery point prefix
*/
RK_U32 is_i_recovery : 1;
RK_U32 reserved1 : 1;
/* reencode times */
RK_U32 reencode_times : 8;
/* sequential index for each frame */
RK_U32 seq_idx : 16;
};
RK_U64 val;
} EncFrmStatus;
typedef struct EncCpbStatus_t {
RK_S32 seq_idx;
EncFrmStatus curr;
EncFrmStatus refr;
/* initial cpb status for current frame encoding */
EncFrmStatus init[MAX_CPB_REFS];
/* final cpb status after current frame encoding */
EncFrmStatus final[MAX_CPB_REFS];
} EncCpbStatus;
#define ENC_RC_FORCE_QP (0x00000001)
typedef struct EncRcForceCfg_t {
RK_U32 force_flag;
RK_S32 force_qp;
RK_U32 reserve[6];
} EncRcForceCfg;
/*
* communication channel between rc / hal / hardware
*
* rc -> hal bit_target / bit_max / bit_min
* hal -> hw quality_target / quality_max / quality_min
* hw -> rc / hal bit_real / quality_real / madi / madp
*/
typedef struct EncRcCommonInfo_t {
EncFrmType frame_type;
/* rc to hal */
RK_S32 bit_target;
RK_S32 bit_max;
RK_S32 bit_min;
RK_S32 quality_target;
RK_S32 quality_max;
RK_S32 quality_min;
/* rc from hardware */
RK_S32 bit_real;
RK_S32 quality_real;
RK_S32 madi;
RK_S32 madp;
RK_U32 iblk4_prop; // scale 256
RK_S32 reserve[15];
} EncRcTaskInfo;
typedef struct EncRcTask_s {
EncCpbStatus cpb;
EncFrmStatus frm;
EncRcTaskInfo info;
EncRcForceCfg force;
MppFrame frame;
} EncRcTask;
#endif /* __MPP_RC_DEFS_H__ */

View File

@ -0,0 +1,237 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __MPP_TASK_H__
#define __MPP_TASK_H__
#include "mpp_meta.h"
/*
* Advanced task flow
* Advanced task flow introduces three concepts: port, task and item
*
* Port is from OpenMAX
* Port has two type: input port and output port which are all for data transaction.
* Port work like a queue. task will be dequeue from or enqueue to one port.
* On input side user will dequeue task from input port, setup task and enqueue task
* back to input port.
* On output side user will dequeue task from output port, get the information from
* and then enqueue task back to output port.
*
* Task indicates one transaction on the port.
* Task has two working mode: async mode and sync mode
* If mpp is work in sync mode on task enqueue function return the task has been done
* If mpp is work in async mode on task enqueue function return the task is just put
* on the task queue for process.
* Task can carry different items. Task just like a container of items
*
* Item indicates MppPacket or MppFrame which is contained in one task
*/
/*
* One mpp task queue has two ports: input and output
*
* The whole picture is:
* Top layer mpp has two ports: mpp_input_port and mpp_output_port
* But internally these two ports belongs to two task queue.
* The mpp_input_port is the mpp_input_task_queue's input port.
* The mpp_output_port is the mpp_output_task_queue's output port.
*
* Each port uses its task queue to communication
*/
typedef enum {
MPP_PORT_INPUT,
MPP_PORT_OUTPUT,
MPP_PORT_BUTT,
} MppPortType;
/*
* Advance task work flow mode:
******************************************************************************
* 1. async mode (default_val)
*
* mpp_init(type, coding, MPP_WORK_ASYNC)
*
* input thread
* a - poll(input)
* b - dequeue(input, *task)
* c - task_set_item(packet/frame)
* d - enqueue(input, task) // when enqueue return the task is not done yet
*
* output thread
* a - poll(output)
* b - dequeue(output, *task)
* c - task_get_item(frame/packet)
* d - enqueue(output, task)
******************************************************************************
* 2. sync mode
*
* mpp_init(type, coding, MPP_WORK_SYNC)
*
* a - poll(input)
* b - dequeue(input, *task)
* c - task_set_item(packet/frame)
* d - enqueue(task) // when enqueue return the task is finished
******************************************************************************
*/
typedef enum {
MPP_TASK_ASYNC,
MPP_TASK_SYNC,
MPP_TASK_WORK_MODE_BUTT,
} MppTaskWorkMode;
/*
* Mpp port poll type
*
* MPP_POLL_BLOCK - for block poll
* MPP_POLL_NON_BLOCK - for non-block poll
* small than MPP_POLL_MAX - for poll with timeout in ms
* small than MPP_POLL_BUTT or larger than MPP_POLL_MAX is invalid value
*/
typedef enum {
MPP_POLL_BUTT = -2,
MPP_POLL_BLOCK = -1,
MPP_POLL_NON_BLOCK = 0,
MPP_POLL_MAX = 8000,
} MppPollType;
/*
* Mpp timeout define
* MPP_TIMEOUT_BLOCK - for block poll
* MPP_TIMEOUT_NON_BLOCK - for non-block poll
* small than MPP_TIMEOUT_MAX - for poll with timeout in ms
* small than MPP_TIMEOUT_BUTT or larger than MPP_TIMEOUT_MAX is invalid value
*/
#define MPP_TIMEOUT_BUTT (-2L)
#define MPP_TIMEOUT_BLOCK (-1L)
#define MPP_TIMEOUT_NON_BLOCK (0L)
#define MPP_TIMEOUT_MAX (8000L)
/*
* MppTask is descriptor of a task which send to mpp for process
* mpp can support different type of work mode, for example:
*
* decoder:
*
* 1. typical decoder mode:
* input - MppPacket (normal cpu buffer, need cpu copy)
* output - MppFrame (ion/drm buffer in external/internal mode)
* 2. secure decoder mode:
* input - MppPacket (externel ion/drm buffer, cpu can not access)
* output - MppFrame (ion/drm buffer in external/internal mode, cpu can not access)
*
* interface usage:
*
* typical flow
* input side:
* task_dequeue(ctx, PORT_INPUT, &task);
* task_put_item(task, MODE_INPUT, packet)
* task_enqueue(ctx, PORT_INPUT, task);
* output side:
* task_dequeue(ctx, PORT_OUTPUT, &task);
* task_get_item(task, MODE_OUTPUT, &frame)
* task_enqueue(ctx, PORT_OUTPUT, task);
*
* secure flow
* input side:
* task_dequeue(ctx, PORT_INPUT, &task);
* task_put_item(task, MODE_INPUT, packet)
* task_put_item(task, MODE_OUTPUT, frame) // buffer will be specified here
* task_enqueue(ctx, PORT_INPUT, task);
* output side:
* task_dequeue(ctx, PORT_OUTPUT, &task);
* task_get_item(task, MODE_OUTPUT, &frame)
* task_enqueue(ctx, PORT_OUTPUT, task);
*
* encoder:
*
* 1. typical encoder mode:
* input - MppFrame (ion/drm buffer in external mode)
* output - MppPacket (normal cpu buffer, need cpu copy)
* 2. user input encoder mode:
* input - MppFrame (normal cpu buffer, need to build hardware table for this buffer)
* output - MppPacket (normal cpu buffer, need cpu copy)
* 3. secure encoder mode:
* input - MppFrame (ion/drm buffer in external mode, cpu can not access)
* output - MppPacket (externel ion/drm buffer, cpu can not access)
*
* typical / user input flow
* input side:
* task_dequeue(ctx, PORT_INPUT, &task);
* task_put_item(task, MODE_INPUT, frame)
* task_enqueue(ctx, PORT_INPUT, task);
* output side:
* task_dequeue(ctx, PORT_OUTPUT, &task);
* task_get_item(task, MODE_OUTPUT, &packet)
* task_enqueue(ctx, PORT_OUTPUT, task);
*
* secure flow
* input side:
* task_dequeue(ctx, PORT_INPUT, &task);
* task_put_item(task, MODE_OUTPUT, packet) // buffer will be specified here
* task_put_item(task, MODE_INPUT, frame)
* task_enqueue(ctx, PORT_INPUT, task);
* output side:
* task_dequeue(ctx, PORT_OUTPUT, &task);
* task_get_item(task, MODE_OUTPUT, &packet)
* task_get_item(task, MODE_OUTPUT, &frame)
* task_enqueue(ctx, PORT_OUTPUT, task);
*
* NOTE: this flow can specify the output frame. User will setup both intput frame and output packet
* buffer at the input side. Then at output side when user gets a finished task user can get the output
* packet and corresponding released input frame.
*
* image processing
*
* 1. typical image process mode:
* input - MppFrame (ion/drm buffer in external mode)
* output - MppFrame (ion/drm buffer in external mode)
*
* typical / user input flow
* input side:
* task_dequeue(ctx, PORT_INPUT, &task);
* task_put_item(task, MODE_INPUT, frame)
* task_enqueue(ctx, PORT_INPUT, task);
* output side:
* task_dequeue(ctx, PORT_OUTPUT, &task);
* task_get_item(task, MODE_OUTPUT, &frame)
* task_enqueue(ctx, PORT_OUTPUT, task);
*/
/* NOTE: use index rather then handle to descripbe task */
#ifdef __cplusplus
extern "C" {
#endif
MPP_RET mpp_task_meta_set_s32(MppTask task, MppMetaKey key, RK_S32 val);
MPP_RET mpp_task_meta_set_s64(MppTask task, MppMetaKey key, RK_S64 val);
MPP_RET mpp_task_meta_set_ptr(MppTask task, MppMetaKey key, void *val);
MPP_RET mpp_task_meta_set_frame (MppTask task, MppMetaKey key, MppFrame frame);
MPP_RET mpp_task_meta_set_packet(MppTask task, MppMetaKey key, MppPacket packet);
MPP_RET mpp_task_meta_set_buffer(MppTask task, MppMetaKey key, MppBuffer buffer);
MPP_RET mpp_task_meta_get_s32(MppTask task, MppMetaKey key, RK_S32 *val, RK_S32 default_val);
MPP_RET mpp_task_meta_get_s64(MppTask task, MppMetaKey key, RK_S64 *val, RK_S64 default_val);
MPP_RET mpp_task_meta_get_ptr(MppTask task, MppMetaKey key, void **val, void *default_val);
MPP_RET mpp_task_meta_get_frame (MppTask task, MppMetaKey key, MppFrame *frame);
MPP_RET mpp_task_meta_get_packet(MppTask task, MppMetaKey key, MppPacket *packet);
MPP_RET mpp_task_meta_get_buffer(MppTask task, MppMetaKey key, MppBuffer *buffer);
#ifdef __cplusplus
}
#endif
#endif /*__MPP_QUEUE_H__*/

View File

@ -0,0 +1,118 @@
/*
* Copyright 2022 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_HDR_META_COM_H__
#define __RK_HDR_META_COM_H__
#include "rk_type.h"
typedef enum HdrCodecType_e {
HDR_AVS2 = 0,
HDR_HEVC = 1,
HDR_H264 = 2,
HDR_AV1 = 3,
HDR_CODEC_BUT,
} HdrCodecType;
typedef enum HdrFormat_e {
HDR_NONE = 0,
HDR10 = 1,
HLG = 2,
// RESERVED3 = 3, //reserved for more future static hdr format
// RESERVED4 = 4, //reserved for more future static hdr format
HDRVIVID = 5,
// RESERVED6 = 6, //reserved for hdr vivid
// RESERVED7 = 7, //reserved for hdr vivid
HDR10PLUS = 8,
// RESERVED9 = 9, //reserved for hdr10+
// RESERVED10 = 10,//reserved for hdr10+
DOLBY = 11,
// RESERVED12 = 12, //reserved for other dynamic hdr format
// RESERVED13 = 13, //reserved for other dynamic hdr format
HDR_FORMAT_MAX,
} HdrFormat;
typedef enum HdrPayloadFormat_e {
STATIC = 0,
DYNAMIC = 1,
HDR_PAYLOAD_FORMAT_MAX,
} HdrPayloadFormat;
typedef struct HdrStaticMeta_t {
RK_U32 color_space;
RK_U32 color_primaries;
RK_U32 color_trc;
RK_U32 red_x;
RK_U32 red_y;
RK_U32 green_x;
RK_U32 green_y;
RK_U32 blue_x;
RK_U32 blue_y;
RK_U32 white_point_x;
RK_U32 white_point_y;
RK_U32 min_luminance;
RK_U32 max_luminance;
RK_U32 max_cll;
RK_U32 max_fall;
RK_U32 reserved[4];
} HdrStaticMeta;
/*
* HDR metadata format from codec
*
* +----------+
* | header1 |
* +----------+
* | |
* | payload |
* | |
* +----------+
* | header2 |
* +----------+
* | |
* | payload |
* | |
* +----------+
* | header3 |
* +----------+
* | |
* | payload |
* | |
* +----------+
*/
typedef struct RkMetaHdrHeader_t {
/* For transmission */
RK_U16 magic; /* magic word for checking overwrite error */
RK_U16 size; /* total header+payload length including header */
RK_U16 message_total; /* total message count in current transmission */
RK_U16 message_index; /* current message index in the transmission */
/* For payload identification */
RK_U16 version; /* payload structure version */
RK_U16 hdr_format; /* HDR protocol: HDR10, HLG, Dolby, HDRVivid ... */
RK_U16 hdr_payload_type; /* HDR data type: static data, dynamic data ... */
RK_U16 video_format; /* video format: H.264, H.265, AVS2 ... */
/* For extenstion usage */
RK_U32 reserve[4];
/* payload data aligned to 32bits */
RK_U32 payload[];
} RkMetaHdrHeader;
void fill_hdr_meta_to_frame(MppFrame frame, HdrCodecType codec_type);
#endif

View File

@ -0,0 +1,274 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_MPI_H__
#define __RK_MPI_H__
/**
* @addtogroup rk_mpi
* @brief Rockchip Media Process Interface
* @details Media Process Platform(MPP) provides application programming
* interface for the application layer, by which applications can
* call hardware encode and decode. Current MPP fully supports
* chipset RK3288/RK3228/RK3229/RK3399/RK3328/RV1108. Old chipset
* like RK29xx/RK30xx/RK31XX/RK3368 is partly supported due to lack
* of some hardware register generation module.
*/
#include "rk_mpi_cmd.h"
#include "mpp_task.h"
/**
* @ingroup rk_mpi
* @brief MPP main work function set
* @details all api function are seperated into two sets: data io api set
* and control api set
*
* (1). the data api set is for data input/output flow including:
*
* (1.1) simple data api set:
*
* decode : both send video stream packet to decoder and get video frame from
* decoder at the same time.
*
* encode : both send video frame to encoder and get encoded video stream from
* encoder at the same time.
*
* decode_put_packet: send video stream packet to decoder only, async interface
*
* decode_get_frame : get video frame from decoder only, async interface
*
* encode_put_frame : send video frame to encoder only, async interface
*
* encode_get_packet: get encoded video packet from encoder only, async interface
*
* (1.2) advanced task api set:
*
* poll : poll port for dequeue
*
* dequeue : pop a task from mpp task queue
*
* enqueue : push a task to mpp task queue
*
* (2). the control api set is for mpp context control including:
*
* control : similiar to ioctl in kernel driver, setup or get mpp internal parameter
*
* reset : clear all data in mpp context, discard all packet and frame,
* reset all components to initialized status
*/
typedef struct MppApi_t {
/**
* @brief size of struct MppApi
*/
RK_U32 size;
/**
* @brief mpp api version, generated by Git
*/
RK_U32 version;
// simple data flow interface
/**
* @brief both send video stream packet to decoder and get video frame from
* decoder at the same time
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] packet The input video stream, its usage can refer mpp_packet.h.
* @param[out] frame The output picture, its usage can refer mpp_frame.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*decode)(MppCtx ctx, MppPacket packet, MppFrame *frame);
/**
* @brief send video stream packet to decoder only, async interface
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] packet The input video stream, its usage can refer mpp_packet.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*decode_put_packet)(MppCtx ctx, MppPacket packet);
/**
* @brief get video frame from decoder only, async interface
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[out] frame The output picture, its usage can refer mpp_frame.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*decode_get_frame)(MppCtx ctx, MppFrame *frame);
/**
* @brief both send video frame to encoder and get encoded video stream from
* encoder at the same time
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] frame The input video data, its usage can refer mpp_frame.h.
* @param[out] packet The output compressed data, its usage can refer mpp_packet.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*encode)(MppCtx ctx, MppFrame frame, MppPacket *packet);
/**
* @brief send video frame to encoder only, async interface
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] frame The input video data, its usage can refer mpp_frame.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*encode_put_frame)(MppCtx ctx, MppFrame frame);
/**
* @brief get encoded video packet from encoder only, async interface
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[out] packet The output compressed data, its usage can refer mpp_packet.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*encode_get_packet)(MppCtx ctx, MppPacket *packet);
/**
* @brief ISP interface, will be supported in the future.
*/
MPP_RET (*isp)(MppCtx ctx, MppFrame dst, MppFrame src);
/**
* @brief ISP interface, will be supported in the future.
*/
MPP_RET (*isp_put_frame)(MppCtx ctx, MppFrame frame);
/**
* @brief ISP interface, will be supported in the future.
*/
MPP_RET (*isp_get_frame)(MppCtx ctx, MppFrame *frame);
// advance data flow interface
/**
* @brief poll port for dequeue
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] type input port or output port which are both for data transaction
* @param[in] timeout mpp poll type, its usage can refer mpp_task.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*poll)(MppCtx ctx, MppPortType type, MppPollType timeout);
/**
* @brief dequeue MppTask, pop a task from mpp task queue
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] type input port or output port which are both for data transaction
* @param[out] task MppTask popped from mpp task queue, its usage can refer mpp_task.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*dequeue)(MppCtx ctx, MppPortType type, MppTask *task);
/**
* @brief enqueue MppTask, push a task to mpp task queue
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] type input port or output port which are both for data transaction
* @param[in] task MppTask which is sent to mpp for process, its usage can refer mpp_task.h.
* @return 0 and positive for success, negative for failure. The return
* value is an error code. For details, please refer mpp_err.h.
*/
MPP_RET (*enqueue)(MppCtx ctx, MppPortType type, MppTask task);
// control interface
/**
* @brief discard all packet and frame, reset all component,
* for both decoder and encoder
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @return 0 for success, others for failure. The return value is an
* error code. For details, please refer mpp_err.h.
*/
MPP_RET (*reset)(MppCtx ctx);
/**
* @brief control function for mpp property setting
* @param[in] ctx The context of mpp, created by mpp_create() and initiated
* by mpp_init().
* @param[in] cmd The mpi command, its definition can refer rk_mpi_cmd.h.
* @param[in,out] param The mpi command parameter
* @return 0 for success, others for failure. The return value is an
* error code. For details, please refer mpp_err.h.
*/
MPP_RET (*control)(MppCtx ctx, MpiCmd cmd, MppParam param);
/**
* @brief The reserved segment, may be used in the future
*/
RK_U32 reserv[16];
} MppApi;
#ifdef __cplusplus
extern "C" {
#endif
/**
* @ingroup rk_mpi
* @brief Create empty context structure and mpi function pointers.
* Use functions in MppApi to access mpp services.
* @param[in,out] ctx pointer of the mpp context, refer to MpiImpl_t.
* @param[in,out] mpi pointer of mpi function, refer to MppApi.
* @return 0 for success, others for failure. The return value is an
* error code. For details, please refer mpp_err.h.
* @note This interface creates base flow context, all function calls
* are based on it.
*/
MPP_RET mpp_create(MppCtx *ctx, MppApi **mpi);
/**
* @ingroup rk_mpi
* @brief Call after mpp_create to setup mpp type and video format.
* This function will call internal context init function.
* @param[in] ctx The context of mpp, created by mpp_create().
* @param[in] type specify decoder or encoder, refer to MppCtxType.
* @param[in] coding specify video compression coding, refer to MppCodingType.
* @return 0 for success, others for failure. The return value is an
* error code. For details, please refer mpp_err.h.
*/
MPP_RET mpp_init(MppCtx ctx, MppCtxType type, MppCodingType coding);
/**
* @ingroup rk_mpi
* @brief Destroy mpp context and free both context and mpi structure,
* it matches with mpp_init().
* @param[in] ctx The context of mpp, created by mpp_create().
* @return 0 for success, others for failure. The return value is an
* error code. For details, please refer mpp_err.h.
*/
MPP_RET mpp_destroy(MppCtx ctx);
/**
* @ingroup rk_mpi
* @brief judge given format is supported or not by MPP.
* @param[in] type specify decoder or encoder, refer to MppCtxType.
* @param[in] coding specify video compression coding, refer to MppCodingType.
* @return 0 for support, -1 for unsupported.
*/
MPP_RET mpp_check_support_format(MppCtxType type, MppCodingType coding);
/**
* @ingroup rk_mpi
* @brief List all formats supported by MPP
* @param NULL no need to input parameter
* @return No return value. This function just prints format information supported
* by MPP on standard output.
*/
void mpp_show_support_format(void);
void mpp_show_color_format(void);
#ifdef __cplusplus
}
#endif
#endif /*__RK_MPI_H__*/

View File

@ -0,0 +1,209 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_MPI_CMD_H__
#define __RK_MPI_CMD_H__
/*
* Command id bit usage is defined as follows:
* bit 20 - 23 - module id
* bit 16 - 19 - contex id
* bit 0 - 15 - command id
*/
#define CMD_MODULE_ID_MASK (0x00F00000)
#define CMD_MODULE_OSAL (0x00100000)
#define CMD_MODULE_MPP (0x00200000)
#define CMD_MODULE_CODEC (0x00300000)
#define CMD_MODULE_HAL (0x00400000)
#define CMD_CTX_ID_MASK (0x000F0000)
#define CMD_CTX_ID_DEC (0x00010000)
#define CMD_CTX_ID_ENC (0x00020000)
#define CMD_CTX_ID_ISP (0x00030000)
/* separate encoder / decoder control command to different segment */
#define CMD_CFG_ID_MASK (0x0000FF00)
/* mpp status control command */
#define CMD_STATE_OPS (0x00000100)
/* decoder control command */
#define CMD_DEC_CFG_ALL (0x00000000)
#define CMD_DEC_QUERY (0x00000100)
#define CMD_DEC_CFG (0x00000200)
/* encoder control command */
#define CMD_ENC_CFG_ALL (0x00000000)
#define CMD_ENC_QUERY (0x00000100)
#define CMD_ENC_CFG_RC_API (0x00000200)
#define CMD_ENC_CFG_MISC (0x00008000)
#define CMD_ENC_CFG_SPLIT (0x00008100)
#define CMD_ENC_CFG_REF (0x00008200)
#define CMD_ENC_CFG_ROI (0x00008300)
#define CMD_ENC_CFG_OSD (0x00008400)
typedef enum {
MPP_OSAL_CMD_BASE = CMD_MODULE_OSAL,
MPP_OSAL_CMD_END,
MPP_CMD_BASE = CMD_MODULE_MPP,
MPP_ENABLE_DEINTERLACE,
MPP_SET_INPUT_BLOCK, /* deprecated */
MPP_SET_INTPUT_BLOCK_TIMEOUT, /* deprecated */
MPP_SET_OUTPUT_BLOCK, /* deprecated */
MPP_SET_OUTPUT_BLOCK_TIMEOUT, /* deprecated */
/*
* timeout setup, refer to MPP_TIMEOUT_XXX
* zero - non block
* negative - block with no timeout
* positive - timeout in milisecond
*/
MPP_SET_INPUT_TIMEOUT, /* parameter type RK_S64 */
MPP_SET_OUTPUT_TIMEOUT, /* parameter type RK_S64 */
MPP_SET_DISABLE_THREAD, /* MPP no thread mode and use external thread to decode */
MPP_STATE_CMD_BASE = CMD_MODULE_MPP | CMD_STATE_OPS,
MPP_START,
MPP_STOP,
MPP_PAUSE,
MPP_RESUME,
MPP_CMD_END,
MPP_CODEC_CMD_BASE = CMD_MODULE_CODEC,
MPP_CODEC_GET_FRAME_INFO,
MPP_CODEC_CMD_END,
MPP_DEC_CMD_BASE = CMD_MODULE_CODEC | CMD_CTX_ID_DEC,
MPP_DEC_SET_FRAME_INFO, /* vpu api legacy control for buffer slot dimension init */
MPP_DEC_SET_EXT_BUF_GROUP, /* IMPORTANT: set external buffer group to mpp decoder */
MPP_DEC_SET_INFO_CHANGE_READY,
MPP_DEC_SET_PRESENT_TIME_ORDER, /* use input time order for output */
MPP_DEC_SET_PARSER_SPLIT_MODE, /* Need to setup before init */
MPP_DEC_SET_PARSER_FAST_MODE, /* Need to setup before init */
MPP_DEC_GET_STREAM_COUNT,
MPP_DEC_GET_VPUMEM_USED_COUNT,
MPP_DEC_SET_VC1_EXTRA_DATA,
MPP_DEC_SET_OUTPUT_FORMAT,
MPP_DEC_SET_DISABLE_ERROR, /* When set it will disable sw/hw error (H.264 / H.265) */
MPP_DEC_SET_IMMEDIATE_OUT,
MPP_DEC_SET_ENABLE_DEINTERLACE, /* MPP enable deinterlace by default. Vpuapi can disable it */
MPP_DEC_SET_ENABLE_FAST_PLAY, /* enable idr output immediately */
MPP_DEC_SET_DISABLE_THREAD, /* MPP no thread mode and use external thread to decode */
MPP_DEC_SET_MAX_USE_BUFFER_SIZE,
MPP_DEC_SET_ENABLE_MVC, /* enable MVC decoding*/
MPP_DEC_CMD_QUERY = CMD_MODULE_CODEC | CMD_CTX_ID_DEC | CMD_DEC_QUERY,
/* query decoder runtime information for decode stage */
MPP_DEC_QUERY, /* set and get MppDecQueryCfg structure */
CMD_DEC_CMD_CFG = CMD_MODULE_CODEC | CMD_CTX_ID_DEC | CMD_DEC_CFG,
MPP_DEC_SET_CFG, /* set MppDecCfg structure */
MPP_DEC_GET_CFG, /* get MppDecCfg structure */
MPP_DEC_CMD_END,
MPP_ENC_CMD_BASE = CMD_MODULE_CODEC | CMD_CTX_ID_ENC,
/* basic encoder setup control */
MPP_ENC_SET_CFG, /* set MppEncCfg structure */
MPP_ENC_GET_CFG, /* get MppEncCfg structure */
MPP_ENC_SET_PREP_CFG, /* deprecated set MppEncPrepCfg structure, use MPP_ENC_SET_CFG instead */
MPP_ENC_GET_PREP_CFG, /* deprecated get MppEncPrepCfg structure, use MPP_ENC_GET_CFG instead */
MPP_ENC_SET_RC_CFG, /* deprecated set MppEncRcCfg structure, use MPP_ENC_SET_CFG instead */
MPP_ENC_GET_RC_CFG, /* deprecated get MppEncRcCfg structure, use MPP_ENC_GET_CFG instead */
MPP_ENC_SET_CODEC_CFG, /* deprecated set MppEncCodecCfg structure, use MPP_ENC_SET_CFG instead */
MPP_ENC_GET_CODEC_CFG, /* deprecated get MppEncCodecCfg structure, use MPP_ENC_GET_CFG instead */
/* runtime encoder setup control */
MPP_ENC_SET_IDR_FRAME, /* next frame will be encoded as intra frame */
MPP_ENC_SET_OSD_LEGACY_0, /* deprecated */
MPP_ENC_SET_OSD_LEGACY_1, /* deprecated */
MPP_ENC_SET_OSD_LEGACY_2, /* deprecated */
MPP_ENC_GET_HDR_SYNC, /* get vps / sps / pps which has better sync behavior parameter is MppPacket */
MPP_ENC_GET_EXTRA_INFO, /* deprecated */
MPP_ENC_SET_SEI_CFG, /* SEI: Supplement Enhancemant Information, parameter is MppSeiMode */
MPP_ENC_GET_SEI_DATA, /* SEI: Supplement Enhancemant Information, parameter is MppPacket */
MPP_ENC_PRE_ALLOC_BUFF, /* deprecated */
MPP_ENC_SET_QP_RANGE, /* used for adjusting qp range, the parameter can be 1 or 2 */
MPP_ENC_SET_ROI_CFG, /* set MppEncROICfg structure */
MPP_ENC_SET_CTU_QP, /* for H265 Encoder,set CTU's size and QP */
MPP_ENC_CMD_QUERY = CMD_MODULE_CODEC | CMD_CTX_ID_ENC | CMD_ENC_QUERY,
/* query encoder runtime information for encode stage */
MPP_ENC_QUERY, /* set and get MppEncQueryCfg structure */
/* User define rate control stategy API control */
MPP_ENC_CFG_RC_API = CMD_MODULE_CODEC | CMD_CTX_ID_ENC | CMD_ENC_CFG_RC_API,
/*
* Get RcApiQueryAll structure
* Get all available rate control stategy string and count
*/
MPP_ENC_GET_RC_API_ALL = MPP_ENC_CFG_RC_API + 1,
/*
* Get RcApiQueryType structure
* Get available rate control stategy string with certain type
*/
MPP_ENC_GET_RC_API_BY_TYPE = MPP_ENC_CFG_RC_API + 2,
/*
* Set RcImplApi structure
* Add new or update rate control stategy function pointers
*/
MPP_ENC_SET_RC_API_CFG = MPP_ENC_CFG_RC_API + 3,
/*
* Get RcApiBrief structure
* Get current used rate control stategy brief information (type and name)
*/
MPP_ENC_GET_RC_API_CURRENT = MPP_ENC_CFG_RC_API + 4,
/*
* Set RcApiBrief structure
* Set current used rate control stategy brief information (type and name)
*/
MPP_ENC_SET_RC_API_CURRENT = MPP_ENC_CFG_RC_API + 5,
MPP_ENC_CFG_MISC = CMD_MODULE_CODEC | CMD_CTX_ID_ENC | CMD_ENC_CFG_MISC,
MPP_ENC_SET_HEADER_MODE, /* set MppEncHeaderMode */
MPP_ENC_GET_HEADER_MODE, /* get MppEncHeaderMode */
MPP_ENC_CFG_SPLIT = CMD_MODULE_CODEC | CMD_CTX_ID_ENC | CMD_ENC_CFG_SPLIT,
MPP_ENC_SET_SPLIT, /* set MppEncSliceSplit structure */
MPP_ENC_GET_SPLIT, /* get MppEncSliceSplit structure */
MPP_ENC_CFG_REF = CMD_MODULE_CODEC | CMD_CTX_ID_ENC | CMD_ENC_CFG_REF,
MPP_ENC_SET_REF_CFG, /* set MppEncRefCfg structure */
MPP_ENC_CFG_OSD = CMD_MODULE_CODEC | CMD_CTX_ID_ENC | CMD_ENC_CFG_OSD,
MPP_ENC_SET_OSD_PLT_CFG, /* set OSD palette, parameter should be pointer to MppEncOSDPltCfg */
MPP_ENC_GET_OSD_PLT_CFG, /* get OSD palette, parameter should be pointer to MppEncOSDPltCfg */
MPP_ENC_SET_OSD_DATA_CFG, /* set OSD data with at most 8 regions, parameter should be pointer to MppEncOSDData */
MPP_ENC_CMD_END,
MPP_ISP_CMD_BASE = CMD_MODULE_CODEC | CMD_CTX_ID_ISP,
MPP_ISP_CMD_END,
MPP_HAL_CMD_BASE = CMD_MODULE_HAL,
MPP_HAL_CMD_END,
MPI_CMD_BUTT,
} MpiCmd;
#include "rk_vdec_cmd.h"
#include "rk_vdec_cfg.h"
#include "rk_venc_cmd.h"
#include "rk_venc_cfg.h"
#include "rk_venc_ref.h"
#endif /*__RK_MPI_CMD_H__*/

View File

@ -0,0 +1,142 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_TYPE_H__
#define __RK_TYPE_H__
#include <stddef.h>
#if defined(_WIN32) && !defined(__MINGW32CE__)
typedef unsigned char RK_U8;
typedef unsigned short RK_U16;
typedef unsigned int RK_U32;
typedef unsigned long RK_ULONG;
typedef unsigned __int64 RK_U64;
typedef signed char RK_S8;
typedef signed short RK_S16;
typedef signed int RK_S32;
typedef signed long RK_LONG;
typedef signed __int64 RK_S64;
#else
typedef unsigned char RK_U8;
typedef unsigned short RK_U16;
typedef unsigned int RK_U32;
typedef unsigned long RK_ULONG;
typedef unsigned long long int RK_U64;
typedef signed char RK_S8;
typedef signed short RK_S16;
typedef signed int RK_S32;
typedef signed long RK_LONG;
typedef signed long long int RK_S64;
#endif
#ifndef MODULE_TAG
#define MODULE_TAG NULL
#endif
/**
* @ingroup rk_mpi
* @brief The type of mpp context
* @details This type is used when calling mpp_init(), which including decoder,
* encoder and Image Signal Process(ISP). So far decoder and encoder
* are supported perfectly, and ISP will be supported in the future.
*/
typedef enum {
MPP_CTX_DEC, /**< decoder */
MPP_CTX_ENC, /**< encoder */
MPP_CTX_ISP, /**< isp */
MPP_CTX_BUTT, /**< undefined */
} MppCtxType;
/**
* @ingroup rk_mpi
* @brief Enumeration used to define the possible video compression codings.
* sync with the omx_video.h
*
* @note This essentially refers to file extensions. If the coding is
* being used to specify the ENCODE type, then additional work
* must be done to configure the exact flavor of the compression
* to be used. For decode cases where the user application can
* not differentiate between MPEG-4 and H.264 bit streams, it is
* up to the codec to handle this.
*/
typedef enum {
MPP_VIDEO_CodingUnused, /**< Value when coding is N/A */
MPP_VIDEO_CodingAutoDetect, /**< Autodetection of coding type */
MPP_VIDEO_CodingMPEG2, /**< AKA: H.262 */
MPP_VIDEO_CodingH263, /**< H.263 */
MPP_VIDEO_CodingMPEG4, /**< MPEG-4 */
MPP_VIDEO_CodingWMV, /**< Windows Media Video (WMV1,WMV2,WMV3)*/
MPP_VIDEO_CodingRV, /**< all versions of Real Video */
MPP_VIDEO_CodingAVC, /**< H.264/AVC */
MPP_VIDEO_CodingMJPEG, /**< Motion JPEG */
MPP_VIDEO_CodingVP8, /**< VP8 */
MPP_VIDEO_CodingVP9, /**< VP9 */
MPP_VIDEO_CodingVC1 = 0x01000000, /**< Windows Media Video (WMV1,WMV2,WMV3)*/
MPP_VIDEO_CodingFLV1, /**< Sorenson H.263 */
MPP_VIDEO_CodingDIVX3, /**< DIVX3 */
MPP_VIDEO_CodingVP6,
MPP_VIDEO_CodingHEVC, /**< H.265/HEVC */
MPP_VIDEO_CodingAVSPLUS, /**< AVS+ */
MPP_VIDEO_CodingAVS, /**< AVS profile=0x20 */
MPP_VIDEO_CodingAVS2, /**< AVS2 */
MPP_VIDEO_CodingAV1, /**< av1 */
MPP_VIDEO_CodingKhronosExtensions = 0x6F000000, /**< Reserved region for introducing Khronos Standard Extensions */
MPP_VIDEO_CodingVendorStartUnused = 0x7F000000, /**< Reserved region for introducing Vendor Extensions */
MPP_VIDEO_CodingMax = 0x7FFFFFFF
} MppCodingType;
/*
* All external interface object list here.
* The interface object is defined as void * for expandability
* The cross include between these objects will introduce extra
* compiling difficulty. So we move them together in this header.
*
* Object interface header list:
*
* MppCtx - rk_mpi.h
* MppParam - rk_mpi.h
*
* MppFrame - mpp_frame.h
* MppPacket - mpp_packet.h
*
* MppBuffer - mpp_buffer.h
* MppBufferGroup - mpp_buffer.h
*
* MppTask - mpp_task.h
* MppMeta - mpp_meta.h
*/
typedef void* MppCtx;
typedef void* MppParam;
typedef void* MppFrame;
typedef void* MppPacket;
typedef void* MppBuffer;
typedef void* MppBufferGroup;
typedef void* MppTask;
typedef void* MppMeta;
#endif /*__RK_TYPE_H__*/

View File

@ -0,0 +1,50 @@
/*
* Copyright 2020 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_VDEC_CFG_H__
#define __RK_VDEC_CFG_H__
#include "rk_type.h"
#include "mpp_err.h"
typedef void* MppDecCfg;
#ifdef __cplusplus
extern "C" {
#endif
MPP_RET mpp_dec_cfg_init(MppDecCfg *cfg);
MPP_RET mpp_dec_cfg_deinit(MppDecCfg cfg);
MPP_RET mpp_dec_cfg_set_s32(MppDecCfg cfg, const char *name, RK_S32 val);
MPP_RET mpp_dec_cfg_set_u32(MppDecCfg cfg, const char *name, RK_U32 val);
MPP_RET mpp_dec_cfg_set_s64(MppDecCfg cfg, const char *name, RK_S64 val);
MPP_RET mpp_dec_cfg_set_u64(MppDecCfg cfg, const char *name, RK_U64 val);
MPP_RET mpp_dec_cfg_set_ptr(MppDecCfg cfg, const char *name, void *val);
MPP_RET mpp_dec_cfg_get_s32(MppDecCfg cfg, const char *name, RK_S32 *val);
MPP_RET mpp_dec_cfg_get_u32(MppDecCfg cfg, const char *name, RK_U32 *val);
MPP_RET mpp_dec_cfg_get_s64(MppDecCfg cfg, const char *name, RK_S64 *val);
MPP_RET mpp_dec_cfg_get_u64(MppDecCfg cfg, const char *name, RK_U64 *val);
MPP_RET mpp_dec_cfg_get_ptr(MppDecCfg cfg, const char *name, void **val);
void mpp_dec_cfg_show(void);
#ifdef __cplusplus
}
#endif
#endif /*__RK_VDEC_CFG_H__*/

View File

@ -0,0 +1,69 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_VDEC_CMD_H__
#define __RK_VDEC_CMD_H__
#include "rk_type.h"
#include "mpp_err.h"
/*
* decoder query interface is only for debug usage
*/
#define MPP_DEC_QUERY_STATUS (0x00000001)
#define MPP_DEC_QUERY_WAIT (0x00000002)
#define MPP_DEC_QUERY_FPS (0x00000004)
#define MPP_DEC_QUERY_BPS (0x00000008)
#define MPP_DEC_QUERY_DEC_IN_PKT (0x00000010)
#define MPP_DEC_QUERY_DEC_WORK (0x00000020)
#define MPP_DEC_QUERY_DEC_OUT_FRM (0x00000040)
#define MPP_DEC_QUERY_ALL (MPP_DEC_QUERY_STATUS | \
MPP_DEC_QUERY_WAIT | \
MPP_DEC_QUERY_FPS | \
MPP_DEC_QUERY_BPS | \
MPP_DEC_QUERY_DEC_IN_PKT | \
MPP_DEC_QUERY_DEC_WORK | \
MPP_DEC_QUERY_DEC_OUT_FRM)
typedef struct MppDecQueryCfg_t {
/*
* 32 bit query flag for query data check
* Each bit represent a query data switch.
* bit 0 - for querying decoder runtime status
* bit 1 - for querying decoder runtime waiting status
* bit 2 - for querying decoder realtime decode fps
* bit 3 - for querying decoder realtime input bps
* bit 4 - for querying decoder input packet count
* bit 5 - for querying decoder start hardware times
* bit 6 - for querying decoder output frame count
*/
RK_U32 query_flag;
/* 64 bit query data output */
RK_U32 rt_status;
RK_U32 rt_wait;
RK_U32 rt_fps;
RK_U32 rt_bps;
RK_U32 dec_in_pkt_cnt;
RK_U32 dec_hw_run_cnt;
RK_U32 dec_out_frm_cnt;
} MppDecQueryCfg;
typedef void* MppExtCbCtx;
typedef MPP_RET (*MppExtCbFunc)(MppExtCbCtx cb_ctx, MppCtx mpp, RK_S32 cmd, void *arg);
#endif /*__RK_VDEC_CMD_H__*/

View File

@ -0,0 +1,52 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_VENC_CFG_H__
#define __RK_VENC_CFG_H__
#include "rk_type.h"
#include "mpp_err.h"
typedef void* MppEncCfg;
#ifdef __cplusplus
extern "C" {
#endif
MPP_RET mpp_enc_cfg_init(MppEncCfg *cfg);
MPP_RET mpp_enc_cfg_deinit(MppEncCfg cfg);
MPP_RET mpp_enc_cfg_set_s32(MppEncCfg cfg, const char *name, RK_S32 val);
MPP_RET mpp_enc_cfg_set_u32(MppEncCfg cfg, const char *name, RK_U32 val);
MPP_RET mpp_enc_cfg_set_s64(MppEncCfg cfg, const char *name, RK_S64 val);
MPP_RET mpp_enc_cfg_set_u64(MppEncCfg cfg, const char *name, RK_U64 val);
MPP_RET mpp_enc_cfg_set_ptr(MppEncCfg cfg, const char *name, void *val);
MPP_RET mpp_enc_cfg_set_st(MppEncCfg cfg, const char *name, void *val);
MPP_RET mpp_enc_cfg_get_s32(MppEncCfg cfg, const char *name, RK_S32 *val);
MPP_RET mpp_enc_cfg_get_u32(MppEncCfg cfg, const char *name, RK_U32 *val);
MPP_RET mpp_enc_cfg_get_s64(MppEncCfg cfg, const char *name, RK_S64 *val);
MPP_RET mpp_enc_cfg_get_u64(MppEncCfg cfg, const char *name, RK_U64 *val);
MPP_RET mpp_enc_cfg_get_ptr(MppEncCfg cfg, const char *name, void **val);
MPP_RET mpp_enc_cfg_get_st(MppEncCfg cfg, const char *name, void *val);
void mpp_enc_cfg_show(void);
#ifdef __cplusplus
}
#endif
#endif /*__RK_VENC_CFG_H__*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_VENC_RC_H__
#define __RK_VENC_RC_H__
#include "rk_type.h"
#define MPP_ENC_MIN_BPS (SZ_1K)
#define MPP_ENC_MAX_BPS (SZ_1M * 200)
/* Rate control parameter */
typedef enum MppEncRcMode_e {
MPP_ENC_RC_MODE_VBR,
MPP_ENC_RC_MODE_CBR,
MPP_ENC_RC_MODE_FIXQP,
MPP_ENC_RC_MODE_AVBR,
MPP_ENC_RC_MODE_BUTT
} MppEncRcMode;
typedef enum MppEncRcPriority_e {
MPP_ENC_RC_BY_BITRATE_FIRST,
MPP_ENC_RC_BY_FRM_SIZE_FIRST,
MPP_ENC_RC_PRIORITY_BUTT
} MppEncRcPriority;
typedef enum MppEncRcDropFrmMode_e {
MPP_ENC_RC_DROP_FRM_DISABLED,
MPP_ENC_RC_DROP_FRM_NORMAL,
MPP_ENC_RC_DROP_FRM_PSKIP,
MPP_ENC_RC_DROP_FRM_BUTT
} MppEncRcDropFrmMode;
typedef enum MppEncRcSuperFrameMode_t {
MPP_ENC_RC_SUPER_FRM_NONE,
MPP_ENC_RC_SUPER_FRM_DROP,
MPP_ENC_RC_SUPER_FRM_REENC,
MPP_ENC_RC_SUPER_FRM_BUTT
} MppEncRcSuperFrameMode;
typedef enum MppEncRcGopMode_e {
MPP_ENC_RC_NORMAL_P,
MPP_ENC_RC_SMART_P,
MPP_ENC_RC_GOP_MODE_BUTT,
} MppEncRcGopMode;
typedef enum MppEncRcIntraRefreshMode_e {
MPP_ENC_RC_INTRA_REFRESH_ROW = 0,
MPP_ENC_RC_INTRA_REFRESH_COL,
MPP_ENC_RC_INTRA_REFRESH_BUTT
} MppEncRcRefreshMode;
#endif /*__RK_VENC_RC_H__*/

View File

@ -0,0 +1,242 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RK_VENC_REF_H__
#define __RK_VENC_REF_H__
#include "rk_type.h"
#include "mpp_err.h"
/*
* MPP reference management system follows the model of H.264/H.265 reference
* frame mangement.
*
* The reference frame is defined into two type: long-term reference frame and
* short-refernce frame (lt_ref and st_ref).
*
* The lt_ref can be only indexed by long-term reference frame index (lt_idx).
* The st_ref can be indexed by its temporal id (tid) and previous count.
*
* MppEncRefMode defined the way for user to reference the required frame.
*
* Normal reference mode without argument
* REF_TO_PREV_REF_FRM - refer to previous reference frame in encode order (No matter Lt or St)
* REF_TO_PREV_ST_REF - refer to previous short-term reference frame
* REF_TO_PREV_LT_REF - refer to previous long-term reference frame
* REF_TO_PREV_INTRA - refer to previous Intra / IDR frame
* REF_TO_ST_REF_SETUP - refer to refernce frame defined in StRefSetup
*
* Normal reference mode with argument
* REF_TO_TEMPORAL_LAYER - refer to previous reference frame with temporal id argument
* REF_TO_LT_REF_IDX - refer to long-term reference frame with lt_ref_idx argument
* REF_TO_ST_PREV_N_REF - refer to short-term reference frame with diff frame_num argument
*
* Long-term reference only mode
* REF_TO_ST_REF_SETUP - use corresponding mode of original short-term reference frame
*
* Short-term reference only mode
* REF_TO_LT_REF_SETUP - indicate that this frame will be overwrited by long-term config
*
* By combining frames with these modes user can define many kinds of reference hierarchy
* structure. But normally user should use simplified preset hierarchy pattern.
*
* The rules for virtual cpb management is similiar to H.264/H.265
* 1. When one frame is marked as long-term reference frame it will be kept in cpb until
* it is replaced by other frame with the same lt_idx or IDR frame.
* 2. When one frame is marked as short-term reference frame it will be inert into cpb when
* there is enough storage space. When the number of total sum of long-term and short-term
* reference frame excess the cpb size limit the oldest short-term frame will be removed.
* This is call sliding window in H.264.
*/
/* max 4 temporal layer */
#define MPP_ENC_MAX_TEMPORAL_LAYER_NUM 4
/* max 4 long-term reference frame */
#define MPP_ENC_MAX_LT_REF_NUM 16
/*
* Group Of Picture (GOP) config is separated into three parts:
*
* 1. Intra / IDR frame config
* igop - the interval of two intra / IDR frames
*
* 2. Long-term reference config (MppEncRefLtFrmCfg)
*
* Setup long-term reference index max lt_idx, loop interval and reference
* mode for auto long-term reference frame generation. The encoder will
* mark frame to be long-term reference frame with given interval.
*
* 2.1 lt_idx
* The long-term reference frame index is unique identifier for a long-term
* reference frame.
* The max long-term reference frame index should NOT larger than
* max_num_ref_frames in sps.
*
* 2.2 lt_gap
* When lt_gap is zero the long-term reference frame generation is disabled.
* When lt_gap is non-zero (usually 2~3 second interval) then the long-term
* reference frame will be generated for error recovery or smart hierarchy.
*
* 2.2 lt_delay
* The lt_delay is the delay time for generation of long-term reference frame.
* The start point of lt_delay is the IDR/intra frame genertaed by igop.
*
* 2.4 ref_mode: Long-term refernce frame reference mode
* NOTE: temporal id of longterm reference frame is always zero.
*
* Examples:
* Sequence has only one lt_ref 0 and setup one long-term reference frame
* every 300 frame.
* {
* .lt_idx = 0,
* .lt_gap = 300,
* .lt_delay = 0,
* }
* result:
* frame 0 ...... 299 300 301 ...... 599 600 601
* lt_idx 0 xxxxxx x 0 x xxxxxx x 0 x
*
* Sequence has lt_ref from 0 to 2 and setup a long-term reference frame
* every 100 frame.
* {
* .lt_idx = 0,
* .lt_gap = 300,
* .lt_delay = 0,
* }
* {
* .lt_idx = 1,
* .lt_gap = 300,
* .lt_delay = 100,
* }
* {
* .lt_idx = 2,
* .lt_gap = 300,
* .lt_delay = 200,
* }
* result:
* frame 0 ... 99 100 101 ... 199 200 201 ... 299 300 301
* lt_idx 0 xxx x 1 x xxx x 2 x xxx x 0 x
*
* 3. Short-term reference config (MppEncStRefSetup)
*
* 3.1 is_non_ref
* The is_non_ref indicated the current frame is reference frame or not.
*
* 3.2 temporal_id
* The temporal id of the current frame configure.
*
* 3.3 ref_mode: short-term refernce frame reference mode
*
* 3.4 repeat
* The repeat time of the short-term reference frame configure.
* The overall frame count with the same config is repeat + 1.
*
* Examples:
*
*/
#define REF_MODE_MODE_MASK (0x1F)
#define REF_MODE_ARG_MASK (0xFFFF0000)
typedef enum MppEncRefMode_e {
/* max 32 mode in 32-bit */
/* for default ref global config */
REF_MODE_GLOBAL,
REF_TO_PREV_REF_FRM = REF_MODE_GLOBAL,
REF_TO_PREV_ST_REF,
REF_TO_PREV_LT_REF,
REF_TO_PREV_INTRA,
/* for global config with args */
REF_MODE_GLOBAL_WITH_ARG = 0x4,
/* with ref arg as temporal layer id */
REF_TO_TEMPORAL_LAYER = REF_MODE_GLOBAL_WITH_ARG,
/* with ref arg as long-term reference picture index */
REF_TO_LT_REF_IDX,
/* with ref arg as short-term reference picture difference frame_num */
REF_TO_ST_PREV_N_REF,
REF_MODE_GLOBAL_BUTT,
/* for lt-ref */
REF_MODE_LT = 0x18,
REF_TO_ST_REF_SETUP,
REF_MODE_LT_BUTT,
/* for st-ref */
REF_MODE_ST = 0x1C,
REF_TO_LT_REF_SETUP,
REF_MODE_ST_BUTT,
} MppEncRefMode;
typedef struct MppEncRefLtFrmCfg_t {
RK_S32 lt_idx; /* lt_idx of the reference frame */
RK_S32 temporal_id; /* temporal_id of the reference frame */
MppEncRefMode ref_mode;
RK_S32 ref_arg;
RK_S32 lt_gap; /* gap between two lt-ref with same lt_idx */
RK_S32 lt_delay; /* delay offset to igop start frame */
} MppEncRefLtFrmCfg;
typedef struct MppEncRefStFrmCfg_t {
RK_S32 is_non_ref;
RK_S32 temporal_id;
MppEncRefMode ref_mode;
RK_S32 ref_arg;
RK_S32 repeat; /* repeat times */
} MppEncRefStFrmCfg;
typedef struct MppEncRefPreset_t {
/* input parameter for query */
const char *name;
RK_S32 max_lt_cnt;
RK_S32 max_st_cnt;
MppEncRefLtFrmCfg *lt_cfg;
MppEncRefStFrmCfg *st_cfg;
/* output parameter */
RK_S32 lt_cnt;
RK_S32 st_cnt;
} MppEncRefPreset;
typedef void* MppEncRefCfg;
#ifdef __cplusplus
extern "C" {
#endif
MPP_RET mpp_enc_ref_cfg_init(MppEncRefCfg *ref);
MPP_RET mpp_enc_ref_cfg_deinit(MppEncRefCfg *ref);
MPP_RET mpp_enc_ref_cfg_reset(MppEncRefCfg ref);
MPP_RET mpp_enc_ref_cfg_set_cfg_cnt(MppEncRefCfg ref, RK_S32 lt_cnt, RK_S32 st_cnt);
MPP_RET mpp_enc_ref_cfg_add_lt_cfg(MppEncRefCfg ref, RK_S32 cnt, MppEncRefLtFrmCfg *frm);
MPP_RET mpp_enc_ref_cfg_add_st_cfg(MppEncRefCfg ref, RK_S32 cnt, MppEncRefStFrmCfg *frm);
MPP_RET mpp_enc_ref_cfg_check(MppEncRefCfg ref);
/*
* A new reference configure will restart a new gop and clear cpb by default.
* The keep cpb function will let encoder keeps the current cpb status and do NOT
* reset all the reference frame in cpb.
*/
MPP_RET mpp_enc_ref_cfg_set_keep_cpb(MppEncRefCfg ref, RK_S32 keep);
MPP_RET mpp_enc_ref_cfg_get_preset(MppEncRefPreset *preset);
MPP_RET mpp_enc_ref_cfg_show(MppEncRefCfg ref);
#ifdef __cplusplus
}
#endif
#endif /*__RK_VENC_REF_H__*/

View File

@ -0,0 +1,123 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __VPU_H__
#define __VPU_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "rk_type.h"
#define VPU_SUCCESS (0)
#define VPU_FAILURE (-1)
#define VPU_HW_WAIT_OK VPU_SUCCESS
#define VPU_HW_WAIT_ERROR VPU_FAILURE
#define VPU_HW_WAIT_TIMEOUT 1
// vpu decoder 60 registers, size 240B
#define VPU_REG_NUM_DEC (60)
// vpu post processor 41 registers, size 164B
#define VPU_REG_NUM_PP (41)
// vpu decoder + post processor 101 registers, size 404B
#define VPU_REG_NUM_DEC_PP (VPU_REG_NUM_DEC+VPU_REG_NUM_PP)
// vpu encoder 96 registers, size 384B
#define VPU_REG_NUM_ENC (96)
typedef enum {
VPU_ENC = 0x0,
VPU_DEC = 0x1,
VPU_PP = 0x2,
VPU_DEC_PP = 0x3,
VPU_DEC_HEVC = 0x4,
VPU_DEC_RKV = 0x5,
VPU_ENC_RKV = 0x6,
VPU_DEC_AVSPLUS = 0x7,
VPU_ENC_VEPU22 = 0x8,
VPU_TYPE_BUTT ,
} VPU_CLIENT_TYPE;
/* Hardware decoder configuration description */
typedef struct VPUHwDecConfig {
RK_U32 maxDecPicWidth; /* Maximum video decoding width supported */
RK_U32 maxPpOutPicWidth; /* Maximum output width of Post-Processor */
RK_U32 h264Support; /* HW supports h.264 */
RK_U32 jpegSupport; /* HW supports JPEG */
RK_U32 mpeg4Support; /* HW supports MPEG-4 */
RK_U32 customMpeg4Support; /* HW supports custom MPEG-4 features */
RK_U32 vc1Support; /* HW supports VC-1 Simple */
RK_U32 mpeg2Support; /* HW supports MPEG-2 */
RK_U32 ppSupport; /* HW supports post-processor */
RK_U32 ppConfig; /* HW post-processor functions bitmask */
RK_U32 sorensonSparkSupport; /* HW supports Sorenson Spark */
RK_U32 refBufSupport; /* HW supports reference picture buffering */
RK_U32 vp6Support; /* HW supports VP6 */
RK_U32 vp7Support; /* HW supports VP7 */
RK_U32 vp8Support; /* HW supports VP8 */
RK_U32 avsSupport; /* HW supports AVS */
RK_U32 jpegESupport; /* HW supports JPEG extensions */
RK_U32 rvSupport; /* HW supports REAL */
RK_U32 mvcSupport; /* HW supports H264 MVC extension */
} VPUHwDecConfig_t;
/* Hardware encoder configuration description */
typedef struct VPUHwEndConfig {
RK_U32 maxEncodedWidth; /* Maximum supported width for video encoding (not JPEG) */
RK_U32 h264Enabled; /* HW supports H.264 */
RK_U32 jpegEnabled; /* HW supports JPEG */
RK_U32 mpeg4Enabled; /* HW supports MPEG-4 */
RK_U32 vsEnabled; /* HW supports video stabilization */
RK_U32 rgbEnabled; /* HW supports RGB input */
RK_U32 reg_size; /* HW bus type in use */
RK_U32 reserv[2];
} VPUHwEncConfig_t;
typedef enum {
// common command
VPU_CMD_REGISTER ,
VPU_CMD_REGISTER_ACK_OK ,
VPU_CMD_REGISTER_ACK_FAIL ,
VPU_CMD_UNREGISTER ,
VPU_SEND_CONFIG ,
VPU_SEND_CONFIG_ACK_OK ,
VPU_SEND_CONFIG_ACK_FAIL ,
VPU_GET_HW_INFO ,
VPU_GET_HW_INFO_ACK_OK ,
VPU_GET_HW_INFO_ACK_FAIL ,
VPU_CMD_BUTT ,
} VPU_CMD_TYPE;
int VPUClientInit(VPU_CLIENT_TYPE type);
RK_S32 VPUClientRelease(int socket);
RK_S32 VPUClientSendReg(int socket, RK_U32 *regs, RK_U32 nregs);
RK_S32 VPUClientSendReg2(RK_S32 socket, RK_S32 offset, RK_S32 size, void *param);
RK_S32 VPUClientWaitResult(int socket, RK_U32 *regs, RK_U32 nregs, VPU_CMD_TYPE *cmd, RK_S32 *len);
RK_S32 VPUClientGetHwCfg(int socket, RK_U32 *cfg, RK_U32 cfg_size);
RK_S32 VPUClientGetIOMMUStatus();
RK_U32 VPUCheckSupportWidth();
#ifdef __cplusplus
}
#endif
#endif /* __VPU_H__ */

View File

@ -0,0 +1,504 @@
/*
* Copyright 2015 Rockchip Electronics Co. LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __VPU_API_H__
#define __VPU_API_H__
#include "rk_type.h"
#include "mpp_err.h"
/**
* @brief rockchip media process interface
*/
#define VPU_API_NOPTS_VALUE (0x8000000000000000LL)
/*
* bit definition of ColorType in structure VPU_FRAME
*/
#define VPU_OUTPUT_FORMAT_TYPE_MASK (0x0000ffff)
#define VPU_OUTPUT_FORMAT_ARGB8888 (0x00000000)
#define VPU_OUTPUT_FORMAT_ABGR8888 (0x00000001)
#define VPU_OUTPUT_FORMAT_RGB888 (0x00000002)
#define VPU_OUTPUT_FORMAT_RGB565 (0x00000003)
#define VPU_OUTPUT_FORMAT_RGB555 (0x00000004)
#define VPU_OUTPUT_FORMAT_YUV420_SEMIPLANAR (0x00000005)
#define VPU_OUTPUT_FORMAT_YUV420_PLANAR (0x00000006)
#define VPU_OUTPUT_FORMAT_YUV422 (0x00000007)
#define VPU_OUTPUT_FORMAT_YUV444 (0x00000008)
#define VPU_OUTPUT_FORMAT_YCH420 (0x00000009)
#define VPU_OUTPUT_FORMAT_BIT_MASK (0x000f0000)
#define VPU_OUTPUT_FORMAT_BIT_8 (0x00000000)
#define VPU_OUTPUT_FORMAT_BIT_10 (0x00010000)
#define VPU_OUTPUT_FORMAT_BIT_12 (0x00020000)
#define VPU_OUTPUT_FORMAT_BIT_14 (0x00030000)
#define VPU_OUTPUT_FORMAT_BIT_16 (0x00040000)
#define VPU_OUTPUT_FORMAT_FBC_MASK (0x00f00000)
#define VPU_OUTPUT_FORMAT_FBC_AFBC_V1 (0x00100000)
#define VPU_OUTPUT_FORMAT_FBC_AFBC_V2 (0x00200000)
#define VPU_OUTPUT_FORMAT_DYNCRANGE_MASK (0x0f000000)
#define VPU_OUTPUT_FORMAT_DYNCRANGE_SDR (0x00000000)
#define VPU_OUTPUT_FORMAT_DYNCRANGE_HDR10 (0x01000000)
#define VPU_OUTPUT_FORMAT_DYNCRANGE_HDR_HLG (0x02000000)
#define VPU_OUTPUT_FORMAT_DYNCRANGE_HDR_DOLBY (0x03000000)
/**
* @brief input picture type
*/
typedef enum {
ENC_INPUT_YUV420_PLANAR = 0, /**< YYYY... UUUU... VVVV */
ENC_INPUT_YUV420_SEMIPLANAR = 1, /**< YYYY... UVUVUV... */
ENC_INPUT_YUV422_INTERLEAVED_YUYV = 2, /**< YUYVYUYV... */
ENC_INPUT_YUV422_INTERLEAVED_UYVY = 3, /**< UYVYUYVY... */
ENC_INPUT_RGB565 = 4, /**< 16-bit RGB */
ENC_INPUT_BGR565 = 5, /**< 16-bit RGB */
ENC_INPUT_RGB555 = 6, /**< 15-bit RGB */
ENC_INPUT_BGR555 = 7, /**< 15-bit RGB */
ENC_INPUT_RGB444 = 8, /**< 12-bit RGB */
ENC_INPUT_BGR444 = 9, /**< 12-bit RGB */
ENC_INPUT_RGB888 = 10, /**< 24-bit RGB */
ENC_INPUT_BGR888 = 11, /**< 24-bit RGB */
ENC_INPUT_RGB101010 = 12, /**< 30-bit RGB */
ENC_INPUT_BGR101010 = 13 /**< 30-bit RGB */
} EncInputPictureType;
typedef enum VPU_API_CMD {
VPU_API_ENC_SETCFG,
VPU_API_ENC_GETCFG,
VPU_API_ENC_SETFORMAT,
VPU_API_ENC_SETIDRFRAME,
VPU_API_ENABLE_DEINTERLACE,
VPU_API_SET_VPUMEM_CONTEXT,
VPU_API_USE_PRESENT_TIME_ORDER,
VPU_API_SET_DEFAULT_WIDTH_HEIGH,
VPU_API_SET_INFO_CHANGE,
VPU_API_USE_FAST_MODE,
VPU_API_DEC_GET_STREAM_COUNT,
VPU_API_GET_VPUMEM_USED_COUNT,
VPU_API_GET_FRAME_INFO,
VPU_API_SET_OUTPUT_BLOCK,
VPU_API_GET_EOS_STATUS,
VPU_API_SET_OUTPUT_MODE,
/* get sps/pps header */
VPU_API_GET_EXTRA_INFO = 0x200,
VPU_API_SET_IMMEDIATE_OUT = 0x1000,
VPU_API_SET_PARSER_SPLIT_MODE, /* NOTE: should control before init */
VPU_API_DEC_OUT_FRM_STRUCT_TYPE,
VPU_API_DEC_EN_THUMBNAIL,
VPU_API_DEC_EN_HDR_META,
VPU_API_DEC_EN_MVC,
VPU_API_DEC_EN_FBC_HDR_256_ODD,
VPU_API_ENC_VEPU22_START = 0x2000,
VPU_API_ENC_SET_VEPU22_CFG,
VPU_API_ENC_GET_VEPU22_CFG,
VPU_API_ENC_SET_VEPU22_CTU_QP,
VPU_API_ENC_SET_VEPU22_ROI,
VPU_API_ENC_MPP = 0x3000,
VPU_API_ENC_MPP_SETCFG,
VPU_API_ENC_MPP_GETCFG,
/* mlvec dynamic configure */
VPU_API_ENC_MLVEC_CFG = 0x4000,
VPU_API_ENC_SET_MAX_TID,
VPU_API_ENC_SET_MARK_LTR,
VPU_API_ENC_SET_USE_LTR,
VPU_API_ENC_SET_FRAME_QP,
VPU_API_ENC_SET_BASE_LAYER_PID,
} VPU_API_CMD;
typedef struct {
RK_U32 TimeLow;
RK_U32 TimeHigh;
} TIME_STAMP;
typedef struct {
RK_U32 CodecType;
RK_U32 ImgWidth;
RK_U32 ImgHeight;
RK_U32 ImgHorStride;
RK_U32 ImgVerStride;
RK_U32 BufSize;
} VPU_GENERIC;
typedef struct VPUMem {
RK_U32 phy_addr;
RK_U32 *vir_addr;
RK_U32 size;
RK_U32 *offset;
} VPUMemLinear_t;
typedef struct tVPU_FRAME {
RK_U32 FrameBusAddr[2]; // 0: Y address; 1: UV address;
RK_U32 FrameWidth; // buffer horizontal stride
RK_U32 FrameHeight; // buffer vertical stride
RK_U32 OutputWidth; // deprecated
RK_U32 OutputHeight; // deprecated
RK_U32 DisplayWidth; // valid width for display
RK_U32 DisplayHeight; // valid height for display
RK_U32 CodingType;
RK_U32 FrameType; // frame; top_field_first; bot_field_first
RK_U32 ColorType;
RK_U32 DecodeFrmNum;
TIME_STAMP ShowTime;
RK_U32 ErrorInfo; // error information
RK_U32 employ_cnt;
VPUMemLinear_t vpumem;
struct tVPU_FRAME *next_frame;
union {
struct {
RK_U32 Res0[2];
struct {
RK_U32 ColorPrimaries : 8;
RK_U32 ColorTransfer : 8;
RK_U32 ColorCoeffs : 8;
RK_U32 ColorRange : 1;
RK_U32 Res1 : 7;
};
RK_U32 Res2;
};
RK_U32 Res[4];
};
} VPU_FRAME;
typedef struct FrameThumbInfo {
RK_U32 enable;
RK_U32 yOffset;
RK_U32 uvOffset;
} FrameThumbInfo_t;
typedef struct FrameHdrInfo {
RK_U32 isHdr;
RK_U32 offset;
RK_U32 size;
} FrameHdrInfo_t;
typedef struct VideoFrame {
VPU_FRAME vpuFrame;
FrameThumbInfo_t thumbInfo;
FrameHdrInfo_t hdrInfo;
RK_U32 viewId;
RK_U32 reserved[16];
} VideoFrame_t;
typedef struct VideoPacket {
RK_S64 pts; /* with unit of us*/
RK_S64 dts; /* with unit of us*/
RK_U8 *data;
RK_S32 size;
RK_U32 capability;
RK_U32 nFlags;
} VideoPacket_t;
typedef struct DecoderOut {
RK_U8 *data;
RK_U32 size;
RK_S64 timeUs;
RK_S32 nFlags;
} DecoderOut_t;
typedef struct ParserOut {
RK_U8 *data;
RK_U32 size;
RK_S64 timeUs;
RK_U32 nFlags;
RK_U32 width;
RK_U32 height;
} ParserOut_t;
typedef struct EncInputStream {
RK_U8 *buf;
RK_S32 size;
RK_U32 bufPhyAddr;
RK_S64 timeUs;
RK_U32 nFlags;
} EncInputStream_t;
typedef struct EncoderOut {
RK_U8 *data;
RK_S32 size;
RK_S64 timeUs;
RK_S32 keyFrame;
} EncoderOut_t;
/*
* @brief Enumeration used to define the possible video compression codings.
* @note This essentially refers to file extensions. If the coding is
* being used to specify the ENCODE type, then additional work
* must be done to configure the exact flavor of the compression
* to be used. For decode cases where the user application can
* not differentiate between MPEG-4 and H.264 bit streams, it is
* up to the codec to handle this.
*
* sync with the omx_video.h
*/
typedef enum OMX_RK_VIDEO_CODINGTYPE {
OMX_RK_VIDEO_CodingUnused, /**< Value when coding is N/A */
OMX_RK_VIDEO_CodingAutoDetect, /**< Autodetection of coding type */
OMX_RK_VIDEO_CodingMPEG2, /**< AKA: H.262 */
OMX_RK_VIDEO_CodingH263, /**< H.263 */
OMX_RK_VIDEO_CodingMPEG4, /**< MPEG-4 */
OMX_RK_VIDEO_CodingWMV, /**< Windows Media Video (WMV1,WMV2,WMV3)*/
OMX_RK_VIDEO_CodingRV, /**< all versions of Real Video */
OMX_RK_VIDEO_CodingAVC, /**< H.264/AVC */
OMX_RK_VIDEO_CodingMJPEG, /**< Motion JPEG */
OMX_RK_VIDEO_CodingVP8, /**< VP8 */
OMX_RK_VIDEO_CodingVP9, /**< VP9 */
OMX_RK_VIDEO_CodingVC1 = 0x01000000, /**< Windows Media Video (WMV1,WMV2,WMV3)*/
OMX_RK_VIDEO_CodingFLV1, /**< Sorenson H.263 */
OMX_RK_VIDEO_CodingDIVX3, /**< DIVX3 */
OMX_RK_VIDEO_CodingVP6,
OMX_RK_VIDEO_CodingHEVC, /**< H.265/HEVC */
OMX_RK_VIDEO_CodingAVSPLUS, /**< AVS+ profile 0x48 */
OMX_RK_VIDEO_CodingAVS, /**< AVS profile 0x20 */
OMX_RK_VIDEO_CodingAVS2, /**< AVS2 */
OMX_RK_VIDEO_CodingAV1, /**< av1 */
OMX_RK_VIDEO_CodingKhronosExtensions = 0x6F000000, /**< Reserved region for introducing Khronos Standard Extensions */
OMX_RK_VIDEO_CodingVendorStartUnused = 0x7F000000, /**< Reserved region for introducing Vendor Extensions */
OMX_RK_VIDEO_CodingMax = 0x7FFFFFFF
} OMX_RK_VIDEO_CODINGTYPE;
typedef enum CODEC_TYPE {
CODEC_NONE,
CODEC_DECODER,
CODEC_ENCODER,
CODEC_BUTT,
} CODEC_TYPE;
typedef enum VPU_API_ERR {
VPU_API_OK = 0,
VPU_API_ERR_UNKNOW = -1,
VPU_API_ERR_BASE = -1000,
VPU_API_ERR_LIST_STREAM = VPU_API_ERR_BASE - 1,
VPU_API_ERR_INIT = VPU_API_ERR_BASE - 2,
VPU_API_ERR_VPU_CODEC_INIT = VPU_API_ERR_BASE - 3,
VPU_API_ERR_STREAM = VPU_API_ERR_BASE - 4,
VPU_API_ERR_FATAL_THREAD = VPU_API_ERR_BASE - 5,
VPU_API_EOS_STREAM_REACHED = VPU_API_ERR_BASE - 11,
VPU_API_ERR_BUTT,
} VPU_API_ERR;
typedef enum VPU_FRAME_ERR {
VPU_FRAME_ERR_UNKNOW = 0x0001,
VPU_FRAME_ERR_UNSUPPORT = 0x0002,
} VPU_FRAME_ERR;
typedef struct EncParameter {
RK_S32 width;
RK_S32 height;
RK_S32 rc_mode; /* 0 - CQP mode; 1 - CBR mode; 2 - FIXQP mode*/
RK_S32 bitRate; /* target bitrate */
RK_S32 framerate;
RK_S32 qp;
RK_S32 enableCabac;
RK_S32 cabacInitIdc;
RK_S32 format;
RK_S32 intraPicRate;
RK_S32 framerateout;
RK_S32 profileIdc;
RK_S32 levelIdc;
RK_S32 reserved[3];
} EncParameter_t;
typedef struct EXtraCfg {
RK_S32 vc1extra_size;
RK_S32 vp6codeid;
RK_S32 tsformat;
RK_U32 ori_vpu; /* use origin vpu framework */
/* below used in decode */
RK_U32 mpp_mode; /* use mpp framework */
RK_U32 bit_depth; /* 8 or 10 bit */
RK_U32 yuv_format; /* 0:420 1:422 2:444 */
RK_U32 reserved[16];
} EXtraCfg_t;
/**
* @brief vpu function interface
*/
typedef struct VpuCodecContext {
void* vpuApiObj;
CODEC_TYPE codecType;
OMX_RK_VIDEO_CODINGTYPE videoCoding;
RK_U32 width;
RK_U32 height;
void *extradata;
RK_S32 extradata_size;
RK_U8 enableparsing;
RK_S32 no_thread;
EXtraCfg_t extra_cfg;
void* private_data;
/*
** 1: error state(not working) 0: working
*/
RK_S32 decoder_err;
/**
* Allocate and initialize an VpuCodecContext.
*
* @param ctx The context of vpu api, allocated in this function.
* @param extraData The extra data of codec, some codecs need / can
* use extradata like Huffman tables, also live VC1 codec can
* use extradata to initialize itself.
* @param extra_size The size of extra data.
*
* @return 0 for init success, others for failure.
* @note check whether ctx has been allocated success after you do init.
*/
RK_S32 (*init)(struct VpuCodecContext *ctx, RK_U8 *extraData, RK_U32 extra_size);
/**
* @brief both send video stream packet to decoder and get video frame from
* decoder at the same time
* @param ctx The context of vpu codec
* @param pkt[in] Stream to be decoded
* @param aDecOut[out] Decoding frame
* @return 0 for decode success, others for failure.
*/
RK_S32 (*decode)(struct VpuCodecContext *ctx, VideoPacket_t *pkt, DecoderOut_t *aDecOut);
/**
* @brief both send video frame to encoder and get encoded video stream from
* encoder at the same time.
* @param ctx The context of vpu codec
* @param aEncInStrm[in] Frame to be encoded
* @param aEncOut[out] Encoding stream
* @return 0 for encode success, others for failure.
*/
RK_S32 (*encode)(struct VpuCodecContext *ctx, EncInputStream_t *aEncInStrm, EncoderOut_t *aEncOut);
/**
* @brief flush codec while do fast forward playing.
* @param ctx The context of vpu codec
* @return 0 for flush success, others for failure.
*/
RK_S32 (*flush)(struct VpuCodecContext *ctx);
RK_S32 (*control)(struct VpuCodecContext *ctx, VPU_API_CMD cmdType, void* param);
/**
* @brief send video stream packet to decoder only, async interface
* @param ctx The context of vpu codec
* @param pkt Stream to be decoded
* @return 0 for success, others for failure.
*/
RK_S32 (*decode_sendstream)(struct VpuCodecContext *ctx, VideoPacket_t *pkt);
/**
* @brief get video frame from decoder only, async interface
* @param ctx The context of vpu codec
* @param aDecOut Decoding frame
* @return 0 for success, others for failure.
*/
RK_S32 (*decode_getframe)(struct VpuCodecContext *ctx, DecoderOut_t *aDecOut);
/**
* @brief send video frame to encoder only, async interface
* @param ctx The context of vpu codec
* @param aEncInStrm Frame to be encoded
* @return 0 for success, others for failure.
*/
RK_S32 (*encoder_sendframe)(struct VpuCodecContext *ctx, EncInputStream_t *aEncInStrm);
/**
* @brief get encoded video packet from encoder only, async interface
* @param ctx The context of vpu codec
* @param aEncOut Encoding stream
* @return 0 for success, others for failure.
*/
RK_S32 (*encoder_getstream)(struct VpuCodecContext *ctx, EncoderOut_t *aEncOut);
} VpuCodecContext_t;
/* allocated vpu codec context */
#ifdef __cplusplus
extern "C"
{
#endif
/**
* @brief open context of vpu
* @param ctx pointer of vpu codec context
*/
RK_S32 vpu_open_context(struct VpuCodecContext **ctx);
/**
* @brief close context of vpu
* @param ctx pointer of vpu codec context
*/
RK_S32 vpu_close_context(struct VpuCodecContext **ctx);
#ifdef __cplusplus
}
#endif
/*
* vpu_mem api
*/
#define vpu_display_mem_pool_FIELDS \
RK_S32 (*commit_hdl)(vpu_display_mem_pool *p, RK_S32 hdl, RK_S32 size); \
void* (*get_free)(vpu_display_mem_pool *p); \
RK_S32 (*inc_used)(vpu_display_mem_pool *p, void *hdl); \
RK_S32 (*put_used)(vpu_display_mem_pool *p, void *hdl); \
RK_S32 (*reset)(vpu_display_mem_pool *p); \
RK_S32 (*get_unused_num)(vpu_display_mem_pool *p); \
RK_S32 buff_size;\
float version; \
RK_S32 res[18];
typedef struct vpu_display_mem_pool vpu_display_mem_pool;
struct vpu_display_mem_pool {
vpu_display_mem_pool_FIELDS
};
#ifdef __cplusplus
extern "C"
{
#endif
/*
* vpu memory handle interface
*/
RK_S32 VPUMemJudgeIommu(void);
RK_S32 VPUMallocLinear(VPUMemLinear_t *p, RK_U32 size);
RK_S32 VPUFreeLinear(VPUMemLinear_t *p);
RK_S32 VPUMemDuplicate(VPUMemLinear_t *dst, VPUMemLinear_t *src);
RK_S32 VPUMemLink(VPUMemLinear_t *p);
RK_S32 VPUMemFlush(VPUMemLinear_t *p);
RK_S32 VPUMemClean(VPUMemLinear_t *p);
RK_S32 VPUMemInvalidate(VPUMemLinear_t *p);
RK_S32 VPUMemGetFD(VPUMemLinear_t *p);
RK_S32 VPUMallocLinearFromRender(VPUMemLinear_t *p, RK_U32 size, void *ctx);
/*
* vpu memory allocator and manager interface
*/
vpu_display_mem_pool* open_vpu_memory_pool(void);
void close_vpu_memory_pool(vpu_display_mem_pool *p);
int create_vpu_memory_pool_allocator(vpu_display_mem_pool **ipool, int num, int size);
void release_vpu_memory_pool_allocator(vpu_display_mem_pool *ipool);
#ifdef __cplusplus
}
#endif
#endif /*__VPU_API_H__*/

View File

@ -0,0 +1,20 @@
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
[[ ! "${OPENCV_QUIET}" ]] && ( echo "Setting vars for OpenCV 3.4.5" )
export LD_LIBRARY_PATH="$SCRIPT_DIR/../lib:$LD_LIBRARY_PATH"
if [[ ! "$OPENCV_SKIP_PYTHON" ]]; then
PYTHONPATH_OPENCV="$SCRIPT_DIR/python_loader_is_not_installed"
[[ ! "${OPENCV_QUIET}" ]] && ( echo "Append PYTHONPATH: ${PYTHONPATH_OPENCV}" )
export PYTHONPATH="${PYTHONPATH_OPENCV}:$PYTHONPATH"
fi
# Don't exec in "sourced" mode
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
if [[ $# -ne 0 ]]; then
[[ ! "${OPENCV_QUIET}" && "${OPENCV_VERBOSE}" ]] && ( echo "Executing: $*" )
exec "$@"
fi
fi

View File

@ -0,0 +1,73 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_CV_H
#define OPENCV_OLD_CV_H
#if defined(_MSC_VER)
#define CV_DO_PRAGMA(x) __pragma(x)
#define __CVSTR2__(x) #x
#define __CVSTR1__(x) __CVSTR2__(x)
#define __CVMSVCLOC__ __FILE__ "("__CVSTR1__(__LINE__)") : "
#define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (__CVMSVCLOC__ _msg))
#elif defined(__GNUC__)
#define CV_DO_PRAGMA(x) _Pragma (#x)
#define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (_msg))
#else
#define CV_DO_PRAGMA(x)
#define CV_MSG_PRAGMA(_msg)
#endif
#define CV_WARNING(x) CV_MSG_PRAGMA("Warning: " #x)
//CV_WARNING("This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module")
#include "opencv2/core/core_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/photo/photo_c.h"
#include "opencv2/video/tracking_c.h"
#include "opencv2/objdetect/objdetect_c.h"
#if !defined(CV_IMPL)
#define CV_IMPL extern "C"
#endif //CV_IMPL
#endif // __OPENCV_OLD_CV_H_

View File

@ -0,0 +1,60 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_CV_HPP
#define OPENCV_OLD_CV_HPP
//#if defined(__GNUC__)
//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
//#endif
#include "cv.h"
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/photo.hpp"
#include "opencv2/video.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/objdetect.hpp"
#endif

View File

@ -0,0 +1,57 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_AUX_H
#define OPENCV_OLD_AUX_H
//#if defined(__GNUC__)
//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
//#endif
#include "opencv2/core/core_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/photo/photo_c.h"
#include "opencv2/video/tracking_c.h"
#include "opencv2/objdetect/objdetect_c.h"
#endif
/* End of file. */

View File

@ -0,0 +1,52 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_AUX_HPP
#define OPENCV_OLD_AUX_HPP
//#if defined(__GNUC__)
//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
//#endif
#include "cvaux.h"
#include "opencv2/core/utility.hpp"
#endif

View File

@ -0,0 +1,46 @@
///////////////////////////////////////////////////////////////////////////////
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to
// this license. If you do not agree to this license, do not download,
// install, copy or use the software.
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2008, Google, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation or contributors may not be used to endorse
// or promote products derived from this software without specific
// prior written permission.
//
// This software is provided by the copyright holders and contributors "as is"
// and any express or implied warranties, including, but not limited to, the
// implied warranties of merchantability and fitness for a particular purpose
// are disclaimed. In no event shall the Intel Corporation or contributors be
// liable for any direct, indirect, incidental, special, exemplary, or
// consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifndef OPENCV_OLD_WIMAGE_HPP
#define OPENCV_OLD_WIMAGE_HPP
#include "opencv2/core/wimage.hpp"
#endif

View File

@ -0,0 +1,52 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_CXCORE_H
#define OPENCV_OLD_CXCORE_H
//#if defined(__GNUC__)
//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
//#endif
#include "opencv2/core/core_c.h"
#endif

View File

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_CXCORE_HPP
#define OPENCV_OLD_CXCORE_HPP
//#if defined(__GNUC__)
//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
//#endif
#include "cxcore.h"
#include "opencv2/core.hpp"
#endif

View File

@ -0,0 +1,48 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_EIGEN_HPP
#define OPENCV_OLD_EIGEN_HPP
#include "opencv2/core/eigen.hpp"
#endif

View File

@ -0,0 +1,8 @@
#ifndef OPENCV_OLD_CXMISC_H
#define OPENCV_OLD_CXMISC_H
#ifdef __cplusplus
# include "opencv2/core/utility.hpp"
#endif
#endif

View File

@ -0,0 +1,48 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_HIGHGUI_H
#define OPENCV_OLD_HIGHGUI_H
#include "opencv2/core/core_c.h"
#include "opencv2/highgui/highgui_c.h"
#endif

View File

@ -0,0 +1,47 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_OLD_ML_H
#define OPENCV_OLD_ML_H
#include "opencv2/core/core_c.h"
#include "opencv2/ml.hpp"
#endif

View File

@ -0,0 +1,48 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef __OPENCV_BUILD
#error this is a compatibility header which should not be used inside the OpenCV library
#endif
#include "opencv2/calib3d.hpp"

View File

@ -0,0 +1,427 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CALIB3D_C_H
#define OPENCV_CALIB3D_C_H
#include "opencv2/core/core_c.h"
#ifdef __cplusplus
extern "C" {
#endif
/** @addtogroup calib3d_c
@{
*/
/****************************************************************************************\
* Camera Calibration, Pose Estimation and Stereo *
\****************************************************************************************/
typedef struct CvPOSITObject CvPOSITObject;
/* Allocates and initializes CvPOSITObject structure before doing cvPOSIT */
CVAPI(CvPOSITObject*) cvCreatePOSITObject( CvPoint3D32f* points, int point_count );
/* Runs POSIT (POSe from ITeration) algorithm for determining 3d position of
an object given its model and projection in a weak-perspective case */
CVAPI(void) cvPOSIT( CvPOSITObject* posit_object, CvPoint2D32f* image_points,
double focal_length, CvTermCriteria criteria,
float* rotation_matrix, float* translation_vector);
/* Releases CvPOSITObject structure */
CVAPI(void) cvReleasePOSITObject( CvPOSITObject** posit_object );
/* updates the number of RANSAC iterations */
CVAPI(int) cvRANSACUpdateNumIters( double p, double err_prob,
int model_points, int max_iters );
CVAPI(void) cvConvertPointsHomogeneous( const CvMat* src, CvMat* dst );
/* Calculates fundamental matrix given a set of corresponding points */
#define CV_FM_7POINT 1
#define CV_FM_8POINT 2
#define CV_LMEDS 4
#define CV_RANSAC 8
#define CV_FM_LMEDS_ONLY CV_LMEDS
#define CV_FM_RANSAC_ONLY CV_RANSAC
#define CV_FM_LMEDS CV_LMEDS
#define CV_FM_RANSAC CV_RANSAC
enum
{
CV_ITERATIVE = 0,
CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
};
CVAPI(int) cvFindFundamentalMat( const CvMat* points1, const CvMat* points2,
CvMat* fundamental_matrix,
int method CV_DEFAULT(CV_FM_RANSAC),
double param1 CV_DEFAULT(3.), double param2 CV_DEFAULT(0.99),
CvMat* status CV_DEFAULT(NULL) );
/* For each input point on one of images
computes parameters of the corresponding
epipolar line on the other image */
CVAPI(void) cvComputeCorrespondEpilines( const CvMat* points,
int which_image,
const CvMat* fundamental_matrix,
CvMat* correspondent_lines );
/* Triangulation functions */
CVAPI(void) cvTriangulatePoints(CvMat* projMatr1, CvMat* projMatr2,
CvMat* projPoints1, CvMat* projPoints2,
CvMat* points4D);
CVAPI(void) cvCorrectMatches(CvMat* F, CvMat* points1, CvMat* points2,
CvMat* new_points1, CvMat* new_points2);
/* Computes the optimal new camera matrix according to the free scaling parameter alpha:
alpha=0 - only valid pixels will be retained in the undistorted image
alpha=1 - all the source image pixels will be retained in the undistorted image
*/
CVAPI(void) cvGetOptimalNewCameraMatrix( const CvMat* camera_matrix,
const CvMat* dist_coeffs,
CvSize image_size, double alpha,
CvMat* new_camera_matrix,
CvSize new_imag_size CV_DEFAULT(cvSize(0,0)),
CvRect* valid_pixel_ROI CV_DEFAULT(0),
int center_principal_point CV_DEFAULT(0));
/* Converts rotation vector to rotation matrix or vice versa */
CVAPI(int) cvRodrigues2( const CvMat* src, CvMat* dst,
CvMat* jacobian CV_DEFAULT(0) );
/* Finds perspective transformation between the object plane and image (view) plane */
CVAPI(int) cvFindHomography( const CvMat* src_points,
const CvMat* dst_points,
CvMat* homography,
int method CV_DEFAULT(0),
double ransacReprojThreshold CV_DEFAULT(3),
CvMat* mask CV_DEFAULT(0),
int maxIters CV_DEFAULT(2000),
double confidence CV_DEFAULT(0.995));
/* Computes RQ decomposition for 3x3 matrices */
CVAPI(void) cvRQDecomp3x3( const CvMat *matrixM, CvMat *matrixR, CvMat *matrixQ,
CvMat *matrixQx CV_DEFAULT(NULL),
CvMat *matrixQy CV_DEFAULT(NULL),
CvMat *matrixQz CV_DEFAULT(NULL),
CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
/* Computes projection matrix decomposition */
CVAPI(void) cvDecomposeProjectionMatrix( const CvMat *projMatr, CvMat *calibMatr,
CvMat *rotMatr, CvMat *posVect,
CvMat *rotMatrX CV_DEFAULT(NULL),
CvMat *rotMatrY CV_DEFAULT(NULL),
CvMat *rotMatrZ CV_DEFAULT(NULL),
CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
/* Computes d(AB)/dA and d(AB)/dB */
CVAPI(void) cvCalcMatMulDeriv( const CvMat* A, const CvMat* B, CvMat* dABdA, CvMat* dABdB );
/* Computes r3 = rodrigues(rodrigues(r2)*rodrigues(r1)),
t3 = rodrigues(r2)*t1 + t2 and the respective derivatives */
CVAPI(void) cvComposeRT( const CvMat* _rvec1, const CvMat* _tvec1,
const CvMat* _rvec2, const CvMat* _tvec2,
CvMat* _rvec3, CvMat* _tvec3,
CvMat* dr3dr1 CV_DEFAULT(0), CvMat* dr3dt1 CV_DEFAULT(0),
CvMat* dr3dr2 CV_DEFAULT(0), CvMat* dr3dt2 CV_DEFAULT(0),
CvMat* dt3dr1 CV_DEFAULT(0), CvMat* dt3dt1 CV_DEFAULT(0),
CvMat* dt3dr2 CV_DEFAULT(0), CvMat* dt3dt2 CV_DEFAULT(0) );
/* Projects object points to the view plane using
the specified extrinsic and intrinsic camera parameters */
CVAPI(void) cvProjectPoints2( const CvMat* object_points, const CvMat* rotation_vector,
const CvMat* translation_vector, const CvMat* camera_matrix,
const CvMat* distortion_coeffs, CvMat* image_points,
CvMat* dpdrot CV_DEFAULT(NULL), CvMat* dpdt CV_DEFAULT(NULL),
CvMat* dpdf CV_DEFAULT(NULL), CvMat* dpdc CV_DEFAULT(NULL),
CvMat* dpddist CV_DEFAULT(NULL),
double aspect_ratio CV_DEFAULT(0));
/* Finds extrinsic camera parameters from
a few known corresponding point pairs and intrinsic parameters */
CVAPI(void) cvFindExtrinsicCameraParams2( const CvMat* object_points,
const CvMat* image_points,
const CvMat* camera_matrix,
const CvMat* distortion_coeffs,
CvMat* rotation_vector,
CvMat* translation_vector,
int use_extrinsic_guess CV_DEFAULT(0) );
/* Computes initial estimate of the intrinsic camera parameters
in case of planar calibration target (e.g. chessboard) */
CVAPI(void) cvInitIntrinsicParams2D( const CvMat* object_points,
const CvMat* image_points,
const CvMat* npoints, CvSize image_size,
CvMat* camera_matrix,
double aspect_ratio CV_DEFAULT(1.) );
#define CV_CALIB_CB_ADAPTIVE_THRESH 1
#define CV_CALIB_CB_NORMALIZE_IMAGE 2
#define CV_CALIB_CB_FILTER_QUADS 4
#define CV_CALIB_CB_FAST_CHECK 8
// Performs a fast check if a chessboard is in the input image. This is a workaround to
// a problem of cvFindChessboardCorners being slow on images with no chessboard
// - src: input image
// - size: chessboard size
// Returns 1 if a chessboard can be in this image and findChessboardCorners should be called,
// 0 if there is no chessboard, -1 in case of error
CVAPI(int) cvCheckChessboard(IplImage* src, CvSize size);
/* Detects corners on a chessboard calibration pattern */
CVAPI(int) cvFindChessboardCorners( const void* image, CvSize pattern_size,
CvPoint2D32f* corners,
int* corner_count CV_DEFAULT(NULL),
int flags CV_DEFAULT(CV_CALIB_CB_ADAPTIVE_THRESH+CV_CALIB_CB_NORMALIZE_IMAGE) );
/* Draws individual chessboard corners or the whole chessboard detected */
CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size,
CvPoint2D32f* corners,
int count, int pattern_was_found );
#define CV_CALIB_USE_INTRINSIC_GUESS 1
#define CV_CALIB_FIX_ASPECT_RATIO 2
#define CV_CALIB_FIX_PRINCIPAL_POINT 4
#define CV_CALIB_ZERO_TANGENT_DIST 8
#define CV_CALIB_FIX_FOCAL_LENGTH 16
#define CV_CALIB_FIX_K1 32
#define CV_CALIB_FIX_K2 64
#define CV_CALIB_FIX_K3 128
#define CV_CALIB_FIX_K4 2048
#define CV_CALIB_FIX_K5 4096
#define CV_CALIB_FIX_K6 8192
#define CV_CALIB_RATIONAL_MODEL 16384
#define CV_CALIB_THIN_PRISM_MODEL 32768
#define CV_CALIB_FIX_S1_S2_S3_S4 65536
#define CV_CALIB_TILTED_MODEL 262144
#define CV_CALIB_FIX_TAUX_TAUY 524288
#define CV_CALIB_FIX_TANGENT_DIST 2097152
#define CV_CALIB_NINTRINSIC 18
/* Finds intrinsic and extrinsic camera parameters
from a few views of known calibration pattern */
CVAPI(double) cvCalibrateCamera2( const CvMat* object_points,
const CvMat* image_points,
const CvMat* point_counts,
CvSize image_size,
CvMat* camera_matrix,
CvMat* distortion_coeffs,
CvMat* rotation_vectors CV_DEFAULT(NULL),
CvMat* translation_vectors CV_DEFAULT(NULL),
int flags CV_DEFAULT(0),
CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,DBL_EPSILON)) );
/* Computes various useful characteristics of the camera from the data computed by
cvCalibrateCamera2 */
CVAPI(void) cvCalibrationMatrixValues( const CvMat *camera_matrix,
CvSize image_size,
double aperture_width CV_DEFAULT(0),
double aperture_height CV_DEFAULT(0),
double *fovx CV_DEFAULT(NULL),
double *fovy CV_DEFAULT(NULL),
double *focal_length CV_DEFAULT(NULL),
CvPoint2D64f *principal_point CV_DEFAULT(NULL),
double *pixel_aspect_ratio CV_DEFAULT(NULL));
#define CV_CALIB_FIX_INTRINSIC 256
#define CV_CALIB_SAME_FOCAL_LENGTH 512
/* Computes the transformation from one camera coordinate system to another one
from a few correspondent views of the same calibration target. Optionally, calibrates
both cameras */
CVAPI(double) cvStereoCalibrate( const CvMat* object_points, const CvMat* image_points1,
const CvMat* image_points2, const CvMat* npoints,
CvMat* camera_matrix1, CvMat* dist_coeffs1,
CvMat* camera_matrix2, CvMat* dist_coeffs2,
CvSize image_size, CvMat* R, CvMat* T,
CvMat* E CV_DEFAULT(0), CvMat* F CV_DEFAULT(0),
int flags CV_DEFAULT(CV_CALIB_FIX_INTRINSIC),
CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,1e-6)) );
#define CV_CALIB_ZERO_DISPARITY 1024
/* Computes 3D rotations (+ optional shift) for each camera coordinate system to make both
views parallel (=> to make all the epipolar lines horizontal or vertical) */
CVAPI(void) cvStereoRectify( const CvMat* camera_matrix1, const CvMat* camera_matrix2,
const CvMat* dist_coeffs1, const CvMat* dist_coeffs2,
CvSize image_size, const CvMat* R, const CvMat* T,
CvMat* R1, CvMat* R2, CvMat* P1, CvMat* P2,
CvMat* Q CV_DEFAULT(0),
int flags CV_DEFAULT(CV_CALIB_ZERO_DISPARITY),
double alpha CV_DEFAULT(-1),
CvSize new_image_size CV_DEFAULT(cvSize(0,0)),
CvRect* valid_pix_ROI1 CV_DEFAULT(0),
CvRect* valid_pix_ROI2 CV_DEFAULT(0));
/* Computes rectification transformations for uncalibrated pair of images using a set
of point correspondences */
CVAPI(int) cvStereoRectifyUncalibrated( const CvMat* points1, const CvMat* points2,
const CvMat* F, CvSize img_size,
CvMat* H1, CvMat* H2,
double threshold CV_DEFAULT(5));
/* stereo correspondence parameters and functions */
#define CV_STEREO_BM_NORMALIZED_RESPONSE 0
#define CV_STEREO_BM_XSOBEL 1
/* Block matching algorithm structure */
typedef struct CvStereoBMState
{
// pre-filtering (normalization of input images)
int preFilterType; // =CV_STEREO_BM_NORMALIZED_RESPONSE now
int preFilterSize; // averaging window size: ~5x5..21x21
int preFilterCap; // the output of pre-filtering is clipped by [-preFilterCap,preFilterCap]
// correspondence using Sum of Absolute Difference (SAD)
int SADWindowSize; // ~5x5..21x21
int minDisparity; // minimum disparity (can be negative)
int numberOfDisparities; // maximum disparity - minimum disparity (> 0)
// post-filtering
int textureThreshold; // the disparity is only computed for pixels
// with textured enough neighborhood
int uniquenessRatio; // accept the computed disparity d* only if
// SAD(d) >= SAD(d*)*(1 + uniquenessRatio/100.)
// for any d != d*+/-1 within the search range.
int speckleWindowSize; // disparity variation window
int speckleRange; // acceptable range of variation in window
int trySmallerWindows; // if 1, the results may be more accurate,
// at the expense of slower processing
CvRect roi1, roi2;
int disp12MaxDiff;
// temporary buffers
CvMat* preFilteredImg0;
CvMat* preFilteredImg1;
CvMat* slidingSumBuf;
CvMat* cost;
CvMat* disp;
} CvStereoBMState;
#define CV_STEREO_BM_BASIC 0
#define CV_STEREO_BM_FISH_EYE 1
#define CV_STEREO_BM_NARROW 2
CVAPI(CvStereoBMState*) cvCreateStereoBMState(int preset CV_DEFAULT(CV_STEREO_BM_BASIC),
int numberOfDisparities CV_DEFAULT(0));
CVAPI(void) cvReleaseStereoBMState( CvStereoBMState** state );
CVAPI(void) cvFindStereoCorrespondenceBM( const CvArr* left, const CvArr* right,
CvArr* disparity, CvStereoBMState* state );
CVAPI(CvRect) cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
int numberOfDisparities, int SADWindowSize );
CVAPI(void) cvValidateDisparity( CvArr* disparity, const CvArr* cost,
int minDisparity, int numberOfDisparities,
int disp12MaxDiff CV_DEFAULT(1) );
/* Reprojects the computed disparity image to the 3D space using the specified 4x4 matrix */
CVAPI(void) cvReprojectImageTo3D( const CvArr* disparityImage,
CvArr* _3dImage, const CvMat* Q,
int handleMissingValues CV_DEFAULT(0) );
/** @} calib3d_c */
#ifdef __cplusplus
} // extern "C"
//////////////////////////////////////////////////////////////////////////////////////////
class CV_EXPORTS CvLevMarq
{
public:
CvLevMarq();
CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
bool completeSymmFlag=false );
~CvLevMarq();
void init( int nparams, int nerrs, CvTermCriteria criteria=
cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
bool completeSymmFlag=false );
bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
void clear();
void step();
enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
cv::Ptr<CvMat> mask;
cv::Ptr<CvMat> prevParam;
cv::Ptr<CvMat> param;
cv::Ptr<CvMat> J;
cv::Ptr<CvMat> err;
cv::Ptr<CvMat> JtJ;
cv::Ptr<CvMat> JtJN;
cv::Ptr<CvMat> JtErr;
cv::Ptr<CvMat> JtJV;
cv::Ptr<CvMat> JtJW;
double prevErrNorm, errNorm;
int lambdaLg10;
CvTermCriteria criteria;
int state;
int iters;
bool completeSymmFlag;
int solveMethod;
};
#endif
#endif /* OPENCV_CALIB3D_C_H */

View File

@ -0,0 +1,678 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_AFFINE3_HPP
#define OPENCV_CORE_AFFINE3_HPP
#ifdef __cplusplus
#include <opencv2/core.hpp>
namespace cv
{
//! @addtogroup core
//! @{
/** @brief Affine transform
*
* It represents a 4x4 homogeneous transformation matrix \f$T\f$
*
* \f[T =
* \begin{bmatrix}
* R & t\\
* 0 & 1\\
* \end{bmatrix}
* \f]
*
* where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
*
* You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
* which is converted to a 3x3 rotation matrix by the Rodrigues formula.
*
* To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
* angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
*
* @code
* cv::Vec3f r, t;
* cv::Affine3f T(r, t);
* @endcode
*
* If you already have the rotation matrix \f$R\f$, then you can use
*
* @code
* cv::Matx33f R;
* cv::Affine3f T(R, t);
* @endcode
*
* To extract the rotation matrix \f$R\f$ from \f$T\f$, use
*
* @code
* cv::Matx33f R = T.rotation();
* @endcode
*
* To extract the translation vector \f$t\f$ from \f$T\f$, use
*
* @code
* cv::Vec3f t = T.translation();
* @endcode
*
* To extract the rotation vector \f$r\f$ from \f$T\f$, use
*
* @code
* cv::Vec3f r = T.rvec();
* @endcode
*
* Note that since the mapping from rotation vectors to rotation matrices
* is many to one. The returned rotation vector is not necessarily the one
* you used before to set the matrix.
*
* If you have two transformations \f$T = T_1 * T_2\f$, use
*
* @code
* cv::Affine3f T, T1, T2;
* T = T2.concatenate(T1);
* @endcode
*
* To get the inverse transform of \f$T\f$, use
*
* @code
* cv::Affine3f T, T_inv;
* T_inv = T.inv();
* @endcode
*
*/
template<typename T>
class Affine3
{
public:
typedef T float_type;
typedef Matx<float_type, 3, 3> Mat3;
typedef Matx<float_type, 4, 4> Mat4;
typedef Vec<float_type, 3> Vec3;
//! Default constructor. It represents a 4x4 identity matrix.
Affine3();
//! Augmented affine matrix
Affine3(const Mat4& affine);
/**
* The resulting 4x4 matrix is
*
* \f[
* \begin{bmatrix}
* R & t\\
* 0 & 1\\
* \end{bmatrix}
* \f]
*
* @param R 3x3 rotation matrix.
* @param t 3x1 translation vector.
*/
Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
/**
* Rodrigues vector.
*
* The last row of the current matrix is set to [0,0,0,1].
*
* @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
* indicates the rotation angle in radian (using right hand rule).
* @param t 3x1 translation vector.
*/
Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
/**
* Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
*
* The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
*
* @param data 1-channel matrix.
* when it is 4x4, it is copied to the current matrix and t is not used.
* When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
* When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
* When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
* to compute a 3x3 rotation matrix.
* @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
*/
explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
//! From 16-element array
explicit Affine3(const float_type* vals);
//! Create an 4x4 identity transform
static Affine3 Identity();
/**
* Rotation matrix.
*
* Copy the rotation matrix to the upper left 3x3 part of the current matrix.
* The remaining elements of the current matrix are not changed.
*
* @param R 3x3 rotation matrix.
*
*/
void rotation(const Mat3& R);
/**
* Rodrigues vector.
*
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
*
* @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
* its length indicates the rotation angle in radian (using the right thumb convention).
*/
void rotation(const Vec3& rvec);
/**
* Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
*
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
*
* @param data 1-channel matrix.
* When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
* When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
* is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
*/
void rotation(const Mat& data);
/**
* Copy the 3x3 matrix L to the upper left part of the current matrix
*
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
*
* @param L 3x3 matrix.
*/
void linear(const Mat3& L);
/**
* Copy t to the first three elements of the last column of the current matrix
*
* It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
*
* @param t 3x1 translation vector.
*/
void translation(const Vec3& t);
//! @return the upper left 3x3 part
Mat3 rotation() const;
//! @return the upper left 3x3 part
Mat3 linear() const;
//! @return the upper right 3x1 part
Vec3 translation() const;
//! Rodrigues vector.
//! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
//! @warning Since the mapping between rotation vectors and rotation matrices is many to one,
//! this function returns only one rotation vector that represents the current rotation matrix,
//! which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
Vec3 rvec() const;
//! @return the inverse of the current matrix.
Affine3 inv(int method = cv::DECOMP_SVD) const;
//! a.rotate(R) is equivalent to Affine(R, 0) * a;
Affine3 rotate(const Mat3& R) const;
//! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
Affine3 rotate(const Vec3& rvec) const;
//! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
Affine3 translate(const Vec3& t) const;
//! a.concatenate(affine) is equivalent to affine * a;
Affine3 concatenate(const Affine3& affine) const;
template <typename Y> operator Affine3<Y>() const;
template <typename Y> Affine3<Y> cast() const;
Mat4 matrix;
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
operator Eigen::Transform<T, 3, Eigen::Affine>() const;
#endif
};
template<typename T> static
Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
//! V is a 3-element vector with member fields x, y and z
template<typename T, typename V> static
V operator*(const Affine3<T>& affine, const V& vector);
typedef Affine3<float> Affine3f;
typedef Affine3<double> Affine3d;
static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
template<typename _Tp> class DataType< Affine3<_Tp> >
{
public:
typedef Affine3<_Tp> value_type;
typedef Affine3<typename DataType<_Tp>::work_type> work_type;
typedef _Tp channel_type;
enum { generic_type = 0,
channels = 16,
fmt = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
,depth = DataType<channel_type>::depth
,type = CV_MAKETYPE(depth, channels)
#endif
};
typedef Vec<channel_type, channels> vec_type;
};
namespace traits {
template<typename _Tp>
struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
template<typename _Tp>
struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
} // namespace
//! @} core
}
//! @cond IGNORED
///////////////////////////////////////////////////////////////////////////////////
// Implementation
template<typename T> inline
cv::Affine3<T>::Affine3()
: matrix(Mat4::eye())
{}
template<typename T> inline
cv::Affine3<T>::Affine3(const Mat4& affine)
: matrix(affine)
{}
template<typename T> inline
cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
{
rotation(R);
translation(t);
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
matrix.val[15] = 1;
}
template<typename T> inline
cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
{
rotation(_rvec);
translation(t);
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
matrix.val[15] = 1;
}
template<typename T> inline
cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
{
CV_Assert(data.type() == cv::traits::Type<T>::value);
CV_Assert(data.channels() == 1);
if (data.cols == 4 && data.rows == 4)
{
data.copyTo(matrix);
return;
}
else if (data.cols == 4 && data.rows == 3)
{
rotation(data(Rect(0, 0, 3, 3)));
translation(data(Rect(3, 0, 1, 3)));
}
else
{
rotation(data);
translation(t);
}
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
matrix.val[15] = 1;
}
template<typename T> inline
cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
{}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::Identity()
{
return Affine3<T>(cv::Affine3<T>::Mat4::eye());
}
template<typename T> inline
void cv::Affine3<T>::rotation(const Mat3& R)
{
linear(R);
}
template<typename T> inline
void cv::Affine3<T>::rotation(const Vec3& _rvec)
{
double theta = norm(_rvec);
if (theta < DBL_EPSILON)
rotation(Mat3::eye());
else
{
double c = std::cos(theta);
double s = std::sin(theta);
double c1 = 1. - c;
double itheta = (theta != 0) ? 1./theta : 0.;
Point3_<T> r = _rvec*itheta;
Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );
// R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
// where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;
rotation(R);
}
}
//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
template<typename T> inline
void cv::Affine3<T>::rotation(const cv::Mat& data)
{
CV_Assert(data.type() == cv::traits::Type<T>::value);
CV_Assert(data.channels() == 1);
if (data.cols == 3 && data.rows == 3)
{
Mat3 R;
data.copyTo(R);
rotation(R);
}
else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
{
Vec3 _rvec;
data.reshape(1, 3).copyTo(_rvec);
rotation(_rvec);
}
else
CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
}
template<typename T> inline
void cv::Affine3<T>::linear(const Mat3& L)
{
matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1]; matrix.val[ 2] = L.val[2];
matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4]; matrix.val[ 6] = L.val[5];
matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7]; matrix.val[10] = L.val[8];
}
template<typename T> inline
void cv::Affine3<T>::translation(const Vec3& t)
{
matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
}
template<typename T> inline
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
{
return linear();
}
template<typename T> inline
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
{
typename cv::Affine3<T>::Mat3 R;
R.val[0] = matrix.val[0]; R.val[1] = matrix.val[1]; R.val[2] = matrix.val[ 2];
R.val[3] = matrix.val[4]; R.val[4] = matrix.val[5]; R.val[5] = matrix.val[ 6];
R.val[6] = matrix.val[8]; R.val[7] = matrix.val[9]; R.val[8] = matrix.val[10];
return R;
}
template<typename T> inline
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
{
return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
}
template<typename T> inline
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
{
cv::Vec3d w;
cv::Matx33d u, vt, R = rotation();
cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
R = u * vt;
double rx = R.val[7] - R.val[5];
double ry = R.val[2] - R.val[6];
double rz = R.val[3] - R.val[1];
double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
double theta = acos(c);
if( s < 1e-5 )
{
if( c > 0 )
rx = ry = rz = 0;
else
{
double t;
t = (R.val[0] + 1) * 0.5;
rx = std::sqrt(std::max(t, 0.0));
t = (R.val[4] + 1) * 0.5;
ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
t = (R.val[8] + 1) * 0.5;
rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
rz = -rz;
theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
rx *= theta;
ry *= theta;
rz *= theta;
}
}
else
{
double vth = 1/(2*s);
vth *= theta;
rx *= vth; ry *= vth; rz *= vth;
}
return cv::Vec3d(rx, ry, rz);
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::inv(int method) const
{
return matrix.inv(method);
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
{
Mat3 Lc = linear();
Vec3 tc = translation();
Mat4 result;
result.val[12] = result.val[13] = result.val[14] = 0;
result.val[15] = 1;
for(int j = 0; j < 3; ++j)
{
for(int i = 0; i < 3; ++i)
{
float_type value = 0;
for(int k = 0; k < 3; ++k)
value += R(j, k) * Lc(k, i);
result(j, i) = value;
}
result(j, 3) = R.row(j).dot(tc.t());
}
return result;
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
{
return rotate(Affine3f(_rvec).rotation());
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
{
Mat4 m = matrix;
m.val[ 3] += t[0];
m.val[ 7] += t[1];
m.val[11] += t[2];
return m;
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
{
return (*this).rotate(affine.rotation()).translate(affine.translation());
}
template<typename T> template <typename Y> inline
cv::Affine3<T>::operator Affine3<Y>() const
{
return Affine3<Y>(matrix);
}
template<typename T> template <typename Y> inline
cv::Affine3<Y> cv::Affine3<T>::cast() const
{
return Affine3<Y>(matrix);
}
template<typename T> inline
cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
{
return affine2.concatenate(affine1);
}
template<typename T, typename V> inline
V cv::operator*(const cv::Affine3<T>& affine, const V& v)
{
const typename Affine3<T>::Mat4& m = affine.matrix;
V r;
r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
return r;
}
static inline
cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
{
const cv::Matx44f& m = affine.matrix;
cv::Vec3f r;
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
return r;
}
static inline
cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
{
const cv::Matx44d& m = affine.matrix;
cv::Vec3d r;
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
return r;
}
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
template<typename T> inline
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
{
cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
}
template<typename T> inline
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
{
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
}
template<typename T> inline
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
{
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
cv::Mat(matrix, false).copyTo(hdr);
return r;
}
template<typename T> inline
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
{
return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
}
#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
//! @endcond
#endif /* __cplusplus */
#endif /* OPENCV_CORE_AFFINE3_HPP */

View File

@ -0,0 +1,707 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_BASE_HPP
#define OPENCV_CORE_BASE_HPP
#ifndef __cplusplus
# error base.hpp header must be compiled as C++
#endif
#include "opencv2/opencv_modules.hpp"
#include <climits>
#include <algorithm>
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
namespace cv
{
//! @addtogroup core_utils
//! @{
namespace Error {
//! error codes
enum Code {
StsOk= 0, //!< everything is ok
StsBackTrace= -1, //!< pseudo error for back trace
StsError= -2, //!< unknown /unspecified error
StsInternal= -3, //!< internal error (bad state)
StsNoMem= -4, //!< insufficient memory
StsBadArg= -5, //!< function arg/param is bad
StsBadFunc= -6, //!< unsupported function
StsNoConv= -7, //!< iteration didn't converge
StsAutoTrace= -8, //!< tracing
HeaderIsNull= -9, //!< image header is NULL
BadImageSize= -10, //!< image size is invalid
BadOffset= -11, //!< offset is invalid
BadDataPtr= -12, //!<
BadStep= -13, //!< image step is wrong, this may happen for a non-continuous matrix.
BadModelOrChSeq= -14, //!<
BadNumChannels= -15, //!< bad number of channels, for example, some functions accept only single channel matrices.
BadNumChannel1U= -16, //!<
BadDepth= -17, //!< input image depth is not supported by the function
BadAlphaChannel= -18, //!<
BadOrder= -19, //!< number of dimensions is out of range
BadOrigin= -20, //!< incorrect input origin
BadAlign= -21, //!< incorrect input align
BadCallBack= -22, //!<
BadTileSize= -23, //!<
BadCOI= -24, //!< input COI is not supported
BadROISize= -25, //!< incorrect input roi
MaskIsTiled= -26, //!<
StsNullPtr= -27, //!< null pointer
StsVecLengthErr= -28, //!< incorrect vector length
StsFilterStructContentErr= -29, //!< incorrect filter structure content
StsKernelStructContentErr= -30, //!< incorrect transform kernel content
StsFilterOffsetErr= -31, //!< incorrect filter offset value
StsBadSize= -201, //!< the input/output structure size is incorrect
StsDivByZero= -202, //!< division by zero
StsInplaceNotSupported= -203, //!< in-place operation is not supported
StsObjectNotFound= -204, //!< request can't be completed
StsUnmatchedFormats= -205, //!< formats of input/output arrays differ
StsBadFlag= -206, //!< flag is wrong or not supported
StsBadPoint= -207, //!< bad CvPoint
StsBadMask= -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
StsUnmatchedSizes= -209, //!< sizes of input/output structures do not match
StsUnsupportedFormat= -210, //!< the data format/type is not supported by the function
StsOutOfRange= -211, //!< some of parameters are out of range
StsParseError= -212, //!< invalid syntax/structure of the parsed file
StsNotImplemented= -213, //!< the requested function/feature is not implemented
StsBadMemBlock= -214, //!< an allocated block has been corrupted
StsAssert= -215, //!< assertion failed
GpuNotSupported= -216, //!< no CUDA support
GpuApiCallError= -217, //!< GPU API call error
OpenGlNotSupported= -218, //!< no OpenGL support
OpenGlApiCallError= -219, //!< OpenGL API call error
OpenCLApiCallError= -220, //!< OpenCL API call error
OpenCLDoubleNotSupported= -221,
OpenCLInitError= -222, //!< OpenCL initialization error
OpenCLNoAMDBlasFft= -223
};
} //Error
//! @} core_utils
//! @addtogroup core_array
//! @{
//! matrix decomposition types
enum DecompTypes {
/** Gaussian elimination with the optimal pivot element chosen. */
DECOMP_LU = 0,
/** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
src1 can be singular */
DECOMP_SVD = 1,
/** eigenvalue decomposition; the matrix src1 must be symmetrical */
DECOMP_EIG = 2,
/** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
defined */
DECOMP_CHOLESKY = 3,
/** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
DECOMP_QR = 4,
/** while all the previous flags are mutually exclusive, this flag can be used together with
any of the previous; it means that the normal equations
\f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
solved instead of the original system
\f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
DECOMP_NORMAL = 16
};
/** norm types
src1 and src2 denote input arrays.
*/
enum NormTypes {
/**
\f[
norm = \forkthree
{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) }
{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) }
{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
\f]
*/
NORM_INF = 1,
/**
\f[
norm = \forkthree
{\| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\)}
{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) }
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
\f]*/
NORM_L1 = 2,
/**
\f[
norm = \forkthree
{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
\f]
*/
NORM_L2 = 4,
/**
\f[
norm = \forkthree
{ \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
{ \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
\f]
*/
NORM_L2SQR = 5,
/**
In the case of one input array, calculates the Hamming distance of the array from zero,
In the case of two input arrays, calculates the Hamming distance between the arrays.
*/
NORM_HAMMING = 6,
/**
Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
*/
NORM_HAMMING2 = 7,
NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
NORM_RELATIVE = 8, //!< flag
NORM_MINMAX = 32 //!< flag
};
//! comparison types
enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
CMP_GT = 1, //!< src1 is greater than src2.
CMP_GE = 2, //!< src1 is greater than or equal to src2.
CMP_LT = 3, //!< src1 is less than src2.
CMP_LE = 4, //!< src1 is less than or equal to src2.
CMP_NE = 5 //!< src1 is unequal to src2.
};
//! generalized matrix multiplication flags
enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
GEMM_2_T = 2, //!< transposes src2
GEMM_3_T = 4 //!< transposes src3
};
enum DftFlags {
/** performs an inverse 1D or 2D transform instead of the default forward
transform. */
DFT_INVERSE = 1,
/** scales the result: divide it by the number of array elements. Normally, it is
combined with DFT_INVERSE. */
DFT_SCALE = 2,
/** performs a forward or inverse transform of every individual row of the input
matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
decrease the overhead (which is sometimes several times larger than the processing itself) to
perform 3D and higher-dimensional transformations and so forth.*/
DFT_ROWS = 4,
/** performs a forward transformation of 1D or 2D real array; the result,
though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
description below for details), and such an array can be packed into a real array of the same
size as input, which is the fastest option and which is what the function does by default;
however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
pass the flag to enable the function to produce a full-size complex output array. */
DFT_COMPLEX_OUTPUT = 16,
/** performs an inverse transformation of a 1D or 2D complex array; the
result is normally a complex array of the same size, however, if the input array has
conjugate-complex symmetry (for example, it is a result of forward transformation with
DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
check whether the input is symmetrical or not, you can pass the flag and then the function
will assume the symmetry and produce the real output array (note that when the input is packed
into a real array and inverse transformation is executed, the function treats the input as a
packed complex-conjugate symmetrical array, and the output will also be a real array). */
DFT_REAL_OUTPUT = 32,
/** specifies that input is complex input. If this flag is set, the input must have 2 channels.
On the other hand, for backwards compatibility reason, if input has 2 channels, input is
already considered complex. */
DFT_COMPLEX_INPUT = 64,
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
DCT_INVERSE = DFT_INVERSE,
/** performs a forward or inverse transform of every individual row of the input
matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
decrease the overhead (which is sometimes several times larger than the processing itself) to
perform 3D and higher-dimensional transforms and so forth.*/
DCT_ROWS = DFT_ROWS
};
//! Various border types, image boundaries are denoted with `|`
//! @see borderInterpolate, copyMakeBorder
enum BorderTypes {
BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i`
BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb`
BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg`
BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
BORDER_ISOLATED = 16 //!< do not look outside of ROI
};
//! @} core_array
//! @addtogroup core_utils
//! @{
/*! @brief Signals an error and raises the exception.
By default the function prints information about the error to stderr,
then it either stops if setBreakOnError() had been called before or raises the exception.
It is possible to alternate error processing by using redirectError().
@param _code - error code (Error::Code)
@param _err - error description
@param _func - function name. Available only when the compiler supports getting it
@param _file - source file name where the error has occurred
@param _line - line number in the source file where the error has occurred
@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
*/
CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
#ifdef __GNUC__
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Winvalid-noreturn"
# endif
#endif
/** same as cv::error, but does not return */
CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
{
error(_code, _err, _func, _file, _line);
#ifdef __GNUC__
# if !defined __clang__ && !defined __APPLE__
// this suppresses this warning: "noreturn" function does return [enabled by default]
__builtin_trap();
// or use infinite loop: for (;;) {}
# endif
#endif
}
#ifdef __GNUC__
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic pop
# endif
#endif
#ifdef CV_STATIC_ANALYSIS
// In practice, some macro are not processed correctly (noreturn is not detected).
// We need to use simplified definition for them.
#define CV_Error(...) do { abort(); } while (0)
#define CV_Error_( code, args ) do { cv::format args; abort(); } while (0)
#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)
#define CV_ErrorNoReturn CV_Error
#define CV_ErrorNoReturn_ CV_Error_
#else // CV_STATIC_ANALYSIS
/** @brief Call the error handler.
Currently, the error handler prints the error code and the error message to the standard
error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
the execution stack and all the parameters can be analyzed by the debugger. In the Release
configuration, the exception is thrown.
@param code one of Error::Code
@param msg error message
*/
#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
/** @brief Call the error handler.
This macro can be used to construct an error message on-fly to include some dynamic information,
for example:
@code
// note the extra parentheses around the formatted text message
CV_Error_(Error::StsOutOfRange,
("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
@endcode
@param code one of Error::Code
@param args printf-like formatted error message in parentheses
*/
#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
/** @brief Checks a condition at runtime and throws exception if it fails
The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
configurations while CV_DbgAssert is only retained in the Debug configuration.
*/
#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
//! @cond IGNORED
#define CV__ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
#define CV__ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
#ifdef __OPENCV_BUILD
#undef CV_Error
#define CV_Error CV__ErrorNoReturn
#undef CV_Error_
#define CV_Error_ CV__ErrorNoReturn_
#undef CV_Assert
#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::errorNoReturn( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
#else
// backward compatibility
#define CV_ErrorNoReturn CV__ErrorNoReturn
#define CV_ErrorNoReturn_ CV__ErrorNoReturn_
#endif
//! @endcond
#endif // CV_STATIC_ANALYSIS
//! @cond IGNORED
#if defined OPENCV_FORCE_MULTIARG_ASSERT_CHECK && defined CV_STATIC_ANALYSIS
#warning "OPENCV_FORCE_MULTIARG_ASSERT_CHECK can't be used with CV_STATIC_ANALYSIS"
#undef OPENCV_FORCE_MULTIARG_ASSERT_CHECK
#endif
#ifdef OPENCV_FORCE_MULTIARG_ASSERT_CHECK
#define CV_Assert_1( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
#else
#define CV_Assert_1 CV_Assert
#endif
#define CV_Assert_2( expr1, expr2 ) CV_Assert_1(expr1); CV_Assert_1(expr2)
#define CV_Assert_3( expr1, expr2, expr3 ) CV_Assert_2(expr1, expr2); CV_Assert_1(expr3)
#define CV_Assert_4( expr1, expr2, expr3, expr4 ) CV_Assert_3(expr1, expr2, expr3); CV_Assert_1(expr4)
#define CV_Assert_5( expr1, expr2, expr3, expr4, expr5 ) CV_Assert_4(expr1, expr2, expr3, expr4); CV_Assert_1(expr5)
#define CV_Assert_6( expr1, expr2, expr3, expr4, expr5, expr6 ) CV_Assert_5(expr1, expr2, expr3, expr4, expr5); CV_Assert_1(expr6)
#define CV_Assert_7( expr1, expr2, expr3, expr4, expr5, expr6, expr7 ) CV_Assert_6(expr1, expr2, expr3, expr4, expr5, expr6 ); CV_Assert_1(expr7)
#define CV_Assert_8( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ) CV_Assert_7(expr1, expr2, expr3, expr4, expr5, expr6, expr7 ); CV_Assert_1(expr8)
#define CV_Assert_9( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ) CV_Assert_8(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ); CV_Assert_1(expr9)
#define CV_Assert_10( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9, expr10 ) CV_Assert_9(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ); CV_Assert_1(expr10)
#define CV_Assert_N(...) do { __CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__); } while(0)
#ifdef OPENCV_FORCE_MULTIARG_ASSERT_CHECK
#undef CV_Assert
#define CV_Assert CV_Assert_N
#endif
//! @endcond
#if defined _DEBUG || defined CV_STATIC_ANALYSIS
# define CV_DbgAssert(expr) CV_Assert(expr)
#else
/** replaced with CV_Assert(expr) in Debug configuration */
# define CV_DbgAssert(expr)
#endif
/*
* Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
* bit count of A exclusive XOR'ed with B
*/
struct CV_EXPORTS Hamming
{
enum { normType = NORM_HAMMING };
typedef unsigned char ValueType;
typedef int ResultType;
/** this will count the bits in a ^ b
*/
ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
};
typedef Hamming HammingLUT;
/////////////////////////////////// inline norms ////////////////////////////////////
template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
inline int cv_abs(uchar x) { return x; }
inline int cv_abs(schar x) { return std::abs(x); }
inline int cv_abs(ushort x) { return x; }
inline int cv_abs(short x) { return std::abs(x); }
template<typename _Tp, typename _AccTp> static inline
_AccTp normL2Sqr(const _Tp* a, int n)
{
_AccTp s = 0;
int i=0;
#if CV_ENABLE_UNROLLED
for( ; i <= n - 4; i += 4 )
{
_AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
}
#endif
for( ; i < n; i++ )
{
_AccTp v = a[i];
s += v*v;
}
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normL1(const _Tp* a, int n)
{
_AccTp s = 0;
int i = 0;
#if CV_ENABLE_UNROLLED
for(; i <= n - 4; i += 4 )
{
s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
(_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
}
#endif
for( ; i < n; i++ )
s += cv_abs(a[i]);
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normInf(const _Tp* a, int n)
{
_AccTp s = 0;
for( int i = 0; i < n; i++ )
s = std::max(s, (_AccTp)cv_abs(a[i]));
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
{
_AccTp s = 0;
int i= 0;
#if CV_ENABLE_UNROLLED
for(; i <= n - 4; i += 4 )
{
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
}
#endif
for( ; i < n; i++ )
{
_AccTp v = _AccTp(a[i] - b[i]);
s += v*v;
}
return s;
}
static inline float normL2Sqr(const float* a, const float* b, int n)
{
float s = 0.f;
for( int i = 0; i < n; i++ )
{
float v = a[i] - b[i];
s += v*v;
}
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normL1(const _Tp* a, const _Tp* b, int n)
{
_AccTp s = 0;
int i= 0;
#if CV_ENABLE_UNROLLED
for(; i <= n - 4; i += 4 )
{
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
}
#endif
for( ; i < n; i++ )
{
_AccTp v = _AccTp(a[i] - b[i]);
s += std::abs(v);
}
return s;
}
inline float normL1(const float* a, const float* b, int n)
{
float s = 0.f;
for( int i = 0; i < n; i++ )
{
s += std::abs(a[i] - b[i]);
}
return s;
}
inline int normL1(const uchar* a, const uchar* b, int n)
{
int s = 0;
for( int i = 0; i < n; i++ )
{
s += std::abs(a[i] - b[i]);
}
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normInf(const _Tp* a, const _Tp* b, int n)
{
_AccTp s = 0;
for( int i = 0; i < n; i++ )
{
_AccTp v0 = a[i] - b[i];
s = std::max(s, std::abs(v0));
}
return s;
}
/** @brief Computes the cube root of an argument.
The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
single-precision data.
@param val A function argument.
*/
CV_EXPORTS_W float cubeRoot(float val);
/** @brief Calculates the angle of a 2D vector in degrees.
The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
@param x x-coordinate of the vector.
@param y y-coordinate of the vector.
*/
CV_EXPORTS_W float fastAtan2(float y, float x);
/** proxy for hal::LU */
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
/** proxy for hal::LU */
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
/** proxy for hal::Cholesky */
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
/** proxy for hal::Cholesky */
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
////////////////// forward declarations for important OpenCV types //////////////////
//! @cond IGNORED
template<typename _Tp, int cn> class Vec;
template<typename _Tp, int m, int n> class Matx;
template<typename _Tp> class Complex;
template<typename _Tp> class Point_;
template<typename _Tp> class Point3_;
template<typename _Tp> class Size_;
template<typename _Tp> class Rect_;
template<typename _Tp> class Scalar_;
class CV_EXPORTS RotatedRect;
class CV_EXPORTS Range;
class CV_EXPORTS TermCriteria;
class CV_EXPORTS KeyPoint;
class CV_EXPORTS DMatch;
class CV_EXPORTS RNG;
class CV_EXPORTS Mat;
class CV_EXPORTS MatExpr;
class CV_EXPORTS UMat;
class CV_EXPORTS SparseMat;
typedef Mat MatND;
template<typename _Tp> class Mat_;
template<typename _Tp> class SparseMat_;
class CV_EXPORTS MatConstIterator;
class CV_EXPORTS SparseMatIterator;
class CV_EXPORTS SparseMatConstIterator;
template<typename _Tp> class MatIterator_;
template<typename _Tp> class MatConstIterator_;
template<typename _Tp> class SparseMatIterator_;
template<typename _Tp> class SparseMatConstIterator_;
namespace ogl
{
class CV_EXPORTS Buffer;
class CV_EXPORTS Texture2D;
class CV_EXPORTS Arrays;
}
namespace cuda
{
class CV_EXPORTS GpuMat;
class CV_EXPORTS HostMem;
class CV_EXPORTS Stream;
class CV_EXPORTS Event;
}
namespace cudev
{
template <typename _Tp> class GpuMat_;
}
namespace ipp
{
#if OPENCV_ABI_COMPATIBILITY > 300
CV_EXPORTS unsigned long long getIppFeatures();
#else
CV_EXPORTS int getIppFeatures();
#endif
CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
int line = 0);
CV_EXPORTS int getIppStatus();
CV_EXPORTS String getIppErrorLocation();
CV_EXPORTS_W bool useIPP();
CV_EXPORTS_W void setUseIPP(bool flag);
CV_EXPORTS_W String getIppVersion();
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
CV_EXPORTS_W bool useIPP_NotExact();
CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
#if OPENCV_ABI_COMPATIBILITY < 400
CV_EXPORTS_W bool useIPP_NE();
CV_EXPORTS_W void setUseIPP_NE(bool flag);
#endif
} // ipp
//! @endcond
//! @} core_utils
} // cv
#include "opencv2/core/neon_utils.hpp"
#include "opencv2/core/vsx_utils.hpp"
#include "opencv2/core/check.hpp"
#endif //OPENCV_CORE_BASE_HPP

View File

@ -0,0 +1,23 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
#define OPENCV_CORE_BINDINGS_UTILS_HPP
namespace cv { namespace utils {
//! @addtogroup core_utils
//! @{
CV_EXPORTS_W String dumpInputArray(InputArray argument);
CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);
CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);
CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);
//! @}
}} // namespace
#endif // OPENCV_CORE_BINDINGS_UTILS_HPP

View File

@ -0,0 +1,40 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#ifndef OPENCV_CORE_BUFFER_POOL_HPP
#define OPENCV_CORE_BUFFER_POOL_HPP
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4265)
#endif
namespace cv
{
//! @addtogroup core
//! @{
class BufferPoolController
{
protected:
~BufferPoolController() { }
public:
virtual size_t getReservedSize() const = 0;
virtual size_t getMaxReservedSize() const = 0;
virtual void setMaxReservedSize(size_t size) = 0;
virtual void freeAllReservedBuffers() = 0;
};
//! @}
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#endif // OPENCV_CORE_BUFFER_POOL_HPP

View File

@ -0,0 +1,157 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_CHECK_HPP
#define OPENCV_CORE_CHECK_HPP
#include <opencv2/core/base.hpp>
namespace cv {
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
CV_EXPORTS const char* depthToString(int depth);
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
CV_EXPORTS const String typeToString(int type);
//! @cond IGNORED
namespace detail {
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
CV_EXPORTS const char* depthToString_(int depth);
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
CV_EXPORTS const cv::String typeToString_(int type);
enum TestOp {
TEST_CUSTOM = 0,
TEST_EQ = 1,
TEST_NE = 2,
TEST_LE = 3,
TEST_LT = 4,
TEST_GE = 5,
TEST_GT = 6,
CV__LAST_TEST_OP
};
struct CheckContext {
const char* func;
const char* file;
int line;
enum TestOp testOp;
const char* message;
const char* p1_str;
const char* p2_str;
};
#ifndef CV__CHECK_FILENAME
# define CV__CHECK_FILENAME __FILE__
#endif
#ifndef CV__CHECK_FUNCTION
# if defined _MSC_VER
# define CV__CHECK_FUNCTION __FUNCSIG__
# elif defined __GNUC__
# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
# else
# define CV__CHECK_FUNCTION "<unknown>"
# endif
#endif
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
#define CV__TEST_NE(v1, v2) ((v1) != (v2))
#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
#define CV__TEST_LT(v1, v2) ((v1) < (v2))
#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
#define CV__TEST_GT(v1, v2) ((v1) > (v2))
#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
if(CV__TEST_##op((v1), (v2))) ; else { \
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
} \
} while (0)
#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
if(!!(test_expr)) ; else { \
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
} \
} while (0)
} // namespace
//! @endcond
/// Supported values of these types: int, float, double
#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
/// Check with additional "decoding" of type values in error message
#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
/// Check with additional "decoding" of depth values in error message
#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
/// Example: type == CV_8UC1 || type == CV_8UC3
#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
/// Example: depth == CV_32F || depth == CV_64F
#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
/// Example: v == A || v == B
#define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
// TODO define pretty-printers
#ifndef NDEBUG
#define CV_DbgCheck(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
#define CV_DbgCheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
#define CV_DbgCheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
#define CV_DbgCheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
#define CV_DbgCheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
#define CV_DbgCheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
#define CV_DbgCheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
#else
#define CV_DbgCheck(v, test_expr, msg) do { } while (0)
#define CV_DbgCheckEQ(v1, v2, msg) do { } while (0)
#define CV_DbgCheckNE(v1, v2, msg) do { } while (0)
#define CV_DbgCheckLE(v1, v2, msg) do { } while (0)
#define CV_DbgCheckLT(v1, v2, msg) do { } while (0)
#define CV_DbgCheckGE(v1, v2, msg) do { } while (0)
#define CV_DbgCheckGT(v1, v2, msg) do { } while (0)
#endif
} // namespace
#endif // OPENCV_CORE_CHECK_HPP

View File

@ -0,0 +1,48 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef __OPENCV_BUILD
#error this is a compatibility header which should not be used inside the OpenCV library
#endif
#include "opencv2/core.hpp"

View File

@ -0,0 +1,631 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_CUDAINL_HPP
#define OPENCV_CORE_CUDAINL_HPP
#include "opencv2/core/cuda.hpp"
//! @cond IGNORED
namespace cv { namespace cuda {
//===================================================================================
// GpuMat
//===================================================================================
inline
GpuMat::GpuMat(Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (rows_ > 0 && cols_ > 0)
{
create(rows_, cols_, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (size_.height > 0 && size_.width > 0)
{
create(size_.height, size_.width, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(const GpuMat& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
{
if (refcount)
CV_XADD(refcount, 1);
}
inline
GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
upload(arr);
}
inline
GpuMat::~GpuMat()
{
release();
}
inline
GpuMat& GpuMat::operator =(const GpuMat& m)
{
if (this != &m)
{
GpuMat temp(m);
swap(temp);
}
return *this;
}
inline
void GpuMat::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
void GpuMat::swap(GpuMat& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
std::swap(allocator, b.allocator);
}
inline
GpuMat GpuMat::clone() const
{
GpuMat m;
copyTo(m);
return m;
}
inline
void GpuMat::copyTo(OutputArray dst, InputArray mask) const
{
copyTo(dst, mask, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s)
{
return setTo(s, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
{
return setTo(s, mask, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype) const
{
convertTo(dst, rtype, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
{
convertTo(dst, rtype, alpha, beta, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
{
convertTo(dst, rtype, alpha, 0.0, stream);
}
inline
void GpuMat::assignTo(GpuMat& m, int _type) const
{
if (_type < 0)
m = *this;
else
convertTo(m, _type);
}
inline
uchar* GpuMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
inline
const uchar* GpuMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
template<typename _Tp> inline
_Tp* GpuMat::ptr(int y)
{
return (_Tp*)ptr(y);
}
template<typename _Tp> inline
const _Tp* GpuMat::ptr(int y) const
{
return (const _Tp*)ptr(y);
}
template <class T> inline
GpuMat::operator PtrStepSz<T>() const
{
return PtrStepSz<T>(rows, cols, (T*)data, step);
}
template <class T> inline
GpuMat::operator PtrStep<T>() const
{
return PtrStep<T>((T*)data, step);
}
inline
GpuMat GpuMat::row(int y) const
{
return GpuMat(*this, Range(y, y+1), Range::all());
}
inline
GpuMat GpuMat::col(int x) const
{
return GpuMat(*this, Range::all(), Range(x, x+1));
}
inline
GpuMat GpuMat::rowRange(int startrow, int endrow) const
{
return GpuMat(*this, Range(startrow, endrow), Range::all());
}
inline
GpuMat GpuMat::rowRange(Range r) const
{
return GpuMat(*this, r, Range::all());
}
inline
GpuMat GpuMat::colRange(int startcol, int endcol) const
{
return GpuMat(*this, Range::all(), Range(startcol, endcol));
}
inline
GpuMat GpuMat::colRange(Range r) const
{
return GpuMat(*this, Range::all(), r);
}
inline
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
{
return GpuMat(*this, rowRange_, colRange_);
}
inline
GpuMat GpuMat::operator ()(Rect roi) const
{
return GpuMat(*this, roi);
}
inline
bool GpuMat::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t GpuMat::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t GpuMat::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int GpuMat::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int GpuMat::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int GpuMat::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t GpuMat::step1() const
{
return step / elemSize1();
}
inline
Size GpuMat::size() const
{
return Size(cols, rows);
}
inline
bool GpuMat::empty() const
{
return data == 0;
}
static inline
GpuMat createContinuous(int rows, int cols, int type)
{
GpuMat m;
createContinuous(rows, cols, type, m);
return m;
}
static inline
void createContinuous(Size size, int type, OutputArray arr)
{
createContinuous(size.height, size.width, type, arr);
}
static inline
GpuMat createContinuous(Size size, int type)
{
GpuMat m;
createContinuous(size, type, m);
return m;
}
static inline
void ensureSizeIsEnough(Size size, int type, OutputArray arr)
{
ensureSizeIsEnough(size.height, size.width, type, arr);
}
static inline
void swap(GpuMat& a, GpuMat& b)
{
a.swap(b);
}
//===================================================================================
// HostMem
//===================================================================================
inline
HostMem::HostMem(AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
}
inline
HostMem::HostMem(const HostMem& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
inline
HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
HostMem::HostMem(InputArray arr, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
arr.getMat().copyTo(*this);
}
inline
HostMem::~HostMem()
{
release();
}
inline
HostMem& HostMem::operator =(const HostMem& m)
{
if (this != &m)
{
HostMem temp(m);
swap(temp);
}
return *this;
}
inline
void HostMem::swap(HostMem& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
std::swap(alloc_type, b.alloc_type);
}
inline
HostMem HostMem::clone() const
{
HostMem m(size(), type(), alloc_type);
createMatHeader().copyTo(m);
return m;
}
inline
void HostMem::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
Mat HostMem::createMatHeader() const
{
return Mat(size(), type(), data, step);
}
inline
bool HostMem::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t HostMem::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t HostMem::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int HostMem::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int HostMem::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int HostMem::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t HostMem::step1() const
{
return step / elemSize1();
}
inline
Size HostMem::size() const
{
return Size(cols, rows);
}
inline
bool HostMem::empty() const
{
return data == 0;
}
static inline
void swap(HostMem& a, HostMem& b)
{
a.swap(b);
}
//===================================================================================
// Stream
//===================================================================================
inline
Stream::Stream(const Ptr<Impl>& impl)
: impl_(impl)
{
}
//===================================================================================
// Event
//===================================================================================
inline
Event::Event(const Ptr<Impl>& impl)
: impl_(impl)
{
}
//===================================================================================
// Initialization & Info
//===================================================================================
inline
bool TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
inline
bool TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
}
inline
DeviceInfo::DeviceInfo()
{
device_id_ = getDevice();
}
inline
DeviceInfo::DeviceInfo(int device_id)
{
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
device_id_ = device_id;
}
inline
int DeviceInfo::deviceID() const
{
return device_id_;
}
inline
size_t DeviceInfo::freeMemory() const
{
size_t _totalMemory = 0, _freeMemory = 0;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
}
inline
size_t DeviceInfo::totalMemory() const
{
size_t _totalMemory = 0, _freeMemory = 0;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
}
inline
bool DeviceInfo::supports(FeatureSet feature_set) const
{
int version = majorVersion() * 10 + minorVersion();
return version >= feature_set;
}
}} // namespace cv { namespace cuda {
//===================================================================================
// Mat
//===================================================================================
namespace cv {
inline
Mat::Mat(const cuda::GpuMat& m)
: flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
{
m.download(*this);
}
}
//! @endcond
#endif // OPENCV_CORE_CUDAINL_HPP

View File

@ -0,0 +1,211 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
#define OPENCV_CUDA_DEVICE_BLOCK_HPP
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct Block
{
static __device__ __forceinline__ unsigned int id()
{
return blockIdx.x;
}
static __device__ __forceinline__ unsigned int stride()
{
return blockDim.x * blockDim.y * blockDim.z;
}
static __device__ __forceinline__ void sync()
{
__syncthreads();
}
static __device__ __forceinline__ int flattenedThreadId()
{
return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
int STRIDE = stride();
It t = beg + flattenedThreadId();
for(; t < end; t += STRIDE)
*t = value;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
int STRIDE = stride();
int tid = flattenedThreadId();
value += tid;
for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
{
int STRIDE = stride();
InIt t = beg + flattenedThreadId();
OutIt o = out + (t - beg);
for(; t < end; t += STRIDE, o += STRIDE)
*o = *t;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
{
int STRIDE = stride();
InIt t = beg + flattenedThreadId();
OutIt o = out + (t - beg);
for(; t < end; t += STRIDE, o += STRIDE)
*o = op(*t);
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
int STRIDE = stride();
InIt1 t1 = beg1 + flattenedThreadId();
InIt2 t2 = beg2 + flattenedThreadId();
OutIt o = out + (t1 - beg1);
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
*o = op(*t1, *t2);
}
template<int CTA_SIZE, typename T, class BinOp>
static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
{
int tid = flattenedThreadId();
T val = buffer[tid];
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
if (tid < 32)
{
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
}
}
template<int CTA_SIZE, typename T, class BinOp>
static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
{
int tid = flattenedThreadId();
T val = buffer[tid] = init;
__syncthreads();
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
if (tid < 32)
{
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
}
__syncthreads();
return buffer[0];
}
template <typename T, class BinOp>
static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
{
int ftid = flattenedThreadId();
int sft = stride();
if (sft < n)
{
for (unsigned int i = sft + ftid; i < n; i += sft)
data[ftid] = op(data[ftid], data[i]);
__syncthreads();
n = sft;
}
while (n > 1)
{
unsigned int half = n/2;
if (ftid < half)
data[ftid] = op(data[ftid], data[n - ftid - 1]);
__syncthreads();
n = n - half;
}
}
};
}}}
//! @endcond
#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */

View File

@ -0,0 +1,722 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
//////////////////////////////////////////////////////////////
// BrdConstant
template <typename D> struct BrdRowConstant
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return x >= 0 ? saturate_cast<D>(data[x]) : val;
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return x < width ? saturate_cast<D>(data[x]) : val;
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
}
int width;
D val;
};
template <typename D> struct BrdColConstant
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
int height;
D val;
};
template <typename D> struct BrdConstant
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
{
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
int height;
int width;
D val;
};
//////////////////////////////////////////////////////////////
// BrdReplicate
template <typename D> struct BrdRowReplicate
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int last_col;
};
template <typename D> struct BrdColReplicate
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
}
int last_row;
};
template <typename D> struct BrdReplicate
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int last_row;
int last_col;
};
//////////////////////////////////////////////////////////////
// BrdReflect101
template <typename D> struct BrdRowReflect101
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int last_col;
};
template <typename D> struct BrdColReflect101
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
int last_row;
};
template <typename D> struct BrdReflect101
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int last_row;
int last_col;
};
//////////////////////////////////////////////////////////////
// BrdReflect
template <typename D> struct BrdRowReflect
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (::abs(x) - (x < 0)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(::abs(x) - (x < 0));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int last_col;
};
template <typename D> struct BrdColReflect
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (::abs(y) - (y < 0)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(::abs(y) - (y < 0));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
int last_row;
};
template <typename D> struct BrdReflect
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (::abs(y) - (y < 0)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (::abs(x) - (x < 0)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (last_col - ::abs(last_col - x) + (x > last_col));
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int last_row;
int last_col;
};
//////////////////////////////////////////////////////////////
// BrdWrap
template <typename D> struct BrdRowWrap
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (x < width) * x + (x >= width) * (x % width);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(idx_col_low(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int width;
};
template <typename D> struct BrdColWrap
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return (y < height) * y + (y >= height) * (y % height);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(idx_row_low(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
int height;
};
template <typename D> struct BrdWrap
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
height(height_), width(width_)
{
}
template <typename U>
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
height(height_), width(width_)
{
}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return (y < height) ? y : (y % height);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(idx_row_low(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (x < width) ? x : (x % width);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(idx_col_low(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int height;
int width;
};
//////////////////////////////////////////////////////////////
// BorderReader
template <typename Ptr2D, typename B> struct BorderReader
{
typedef typename B::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
{
return b.at(y, x, ptr);
}
Ptr2D ptr;
B b;
};
// under win32 there is some bug with templated types that passed as kernel parameters
// with this specialization all works fine
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
{
typedef typename BrdConstant<D>::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
src(src_), height(b.height), width(b.width), val(b.val)
{
}
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
Ptr2D src;
int height;
int width;
D val;
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP

View File

@ -0,0 +1,309 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_COLOR_HPP
#define OPENCV_CUDA_COLOR_HPP
#include "detail/color_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
// {
// typedef ... functor_type;
// static __host__ __device__ functor_type create_functor();
// };
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_COLOR_HPP

View File

@ -0,0 +1,109 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_COMMON_HPP
#define OPENCV_CUDA_COMMON_HPP
#include <cuda_runtime.h>
#include "opencv2/core/cuda_types.hpp"
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
#ifndef CV_PI_F
#ifndef CV_PI
#define CV_PI_F 3.14159265f
#else
#define CV_PI_F ((float)CV_PI)
#endif
#endif
namespace cv { namespace cuda {
static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
{
if (cudaSuccess != err)
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
}
}}
#ifndef cudaSafeCall
#define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
#endif
namespace cv { namespace cuda
{
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
static inline bool isAligned(size_t step, size_t size)
{
return step % size == 0;
}
}}
namespace cv { namespace cuda
{
namespace device
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
}
}}
//! @endcond
#endif // OPENCV_CUDA_COMMON_HPP

View File

@ -0,0 +1,113 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
#define OPENCV_CUDA_DATAMOV_UTILS_HPP
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template <typename T> struct ForceGlob
{
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define OPENCV_CUDA_ASM_PTR "l"
#else
// 32-bit register modifier for inlined asm
#define OPENCV_CUDA_ASM_PTR "r"
#endif
template<class T> struct ForceGlob;
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
} \
};
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
} \
};
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8)
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8)
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8)
OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h)
OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h)
OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r)
OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r)
OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f)
OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d)
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
#undef OPENCV_CUDA_ASM_PTR
#endif // __CUDA_ARCH__ >= 200
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP

View File

@ -0,0 +1,365 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
#define OPENCV_CUDA_REDUCE_DETAIL_HPP
#include <thrust/tuple.h>
#include "../warp.hpp"
#include "../warp_shuffle.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace reduce_detail
{
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
template <unsigned int I, unsigned int N>
struct For
{
template <class PointerTuple, class ValTuple>
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
{
thrust::get<I>(smem)[tid] = thrust::get<I>(val);
For<I + 1, N>::loadToSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple>
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
{
thrust::get<I>(val) = thrust::get<I>(smem)[tid];
For<I + 1, N>::loadFromSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple, class OpTuple>
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
{
typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
For<I + 1, N>::merge(smem, val, tid, delta, op);
}
template <class ValTuple, class OpTuple>
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
{
typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
For<I + 1, N>::mergeShfl(val, delta, width, op);
}
};
template <unsigned int N>
struct For<N, N>
{
template <class PointerTuple, class ValTuple>
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
{
}
template <class PointerTuple, class ValTuple>
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
{
}
template <class PointerTuple, class ValTuple, class OpTuple>
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
{
}
template <class ValTuple, class OpTuple>
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
{
}
};
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
{
smem[tid] = val;
}
template <typename T>
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
{
val = smem[tid];
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
}
template <typename T, class Op>
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
{
T reg = smem[tid + delta];
smem[tid] = val = op(val, reg);
}
template <typename T, class Op>
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
{
T reg = shfl_down(val, delta, width);
val = op(val, reg);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid,
unsigned int delta,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
}
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int delta,
unsigned int width,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
}
template <unsigned int N> struct Generic
{
template <typename Pointer, typename Reference, class Op>
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
loadToSmem(smem, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(smem, val, tid, 1024, op);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(smem, val, tid, 512, op);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(smem, val, tid, 256, op);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(smem, val, tid, 128, op);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(smem, val, tid, 64, op);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(smem, val, tid, 32, op);
}
if (tid < 16)
{
merge(smem, val, tid, 16, op);
merge(smem, val, tid, 8, op);
merge(smem, val, tid, 4, op);
merge(smem, val, tid, 2, op);
merge(smem, val, tid, 1, op);
}
}
};
template <unsigned int I, typename Pointer, typename Reference, class Op>
struct Unroll
{
static __device__ void loopShfl(Reference val, Op op, unsigned int N)
{
mergeShfl(val, I, N, op);
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
}
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
{
merge(smem, val, tid, I, op);
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
}
};
template <typename Pointer, typename Reference, class Op>
struct Unroll<0, Pointer, Reference, Op>
{
static __device__ void loopShfl(Reference, Op, unsigned int)
{
}
static __device__ void loop(Pointer, Reference, unsigned int, Op)
{
}
};
template <unsigned int N> struct WarpOptimized
{
template <typename Pointer, typename Reference, class Op>
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
CV_UNUSED(smem);
CV_UNUSED(tid);
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
#else
loadToSmem(smem, val, tid);
if (tid < N / 2)
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
#endif
}
};
template <unsigned int N> struct GenericOptimized32
{
enum { M = N / 32 };
template <typename Pointer, typename Reference, class Op>
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
const unsigned int laneId = Warp::laneId();
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#else
loadToSmem(smem, val, tid);
if (laneId < 16)
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
__syncthreads();
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#endif
__syncthreads();
loadFromSmem(smem, val, tid);
if (tid < 32)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
#else
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
#endif
}
}
};
template <bool val, class T1, class T2> struct StaticIf;
template <class T1, class T2> struct StaticIf<true, T1, T2>
{
typedef T1 type;
};
template <class T1, class T2> struct StaticIf<false, T1, T2>
{
typedef T2 type;
};
template <unsigned int N> struct IsPowerOf2
{
enum { value = ((N != 0) && !(N & (N - 1))) };
};
template <unsigned int N> struct Dispatcher
{
typedef typename StaticIf<
(N <= 32) && IsPowerOf2<N>::value,
WarpOptimized<N>,
typename StaticIf<
(N <= 1024) && IsPowerOf2<N>::value,
GenericOptimized32<N>,
Generic<N>
>::type
>::type reductor;
};
}
}}}
//! @endcond
#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP

View File

@ -0,0 +1,502 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
#include <thrust/tuple.h>
#include "../warp.hpp"
#include "../warp_shuffle.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace reduce_key_val_detail
{
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
template <unsigned int I, unsigned int N>
struct For
{
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
{
thrust::get<I>(smem)[tid] = thrust::get<I>(data);
For<I + 1, N>::loadToSmem(smem, data, tid);
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
{
thrust::get<I>(data) = thrust::get<I>(smem)[tid];
For<I + 1, N>::loadFromSmem(smem, data, tid);
}
template <class ReferenceTuple>
static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
{
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
For<I + 1, N>::copyShfl(val, delta, width);
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
{
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
For<I + 1, N>::copy(svals, val, tid, delta);
}
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
{
typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
{
thrust::get<I>(key) = reg;
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
}
For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
const ValPointerTuple& svals, const ValReferenceTuple& val,
const CmpTuple& cmp,
unsigned int tid, unsigned int delta)
{
typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
{
thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
}
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
}
};
template <unsigned int N>
struct For<N, N>
{
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
{
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
{
}
template <class ReferenceTuple>
static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
{
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
{
}
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
{
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
const ValPointerTuple&, const ValReferenceTuple&,
const CmpTuple&,
unsigned int, unsigned int)
{
}
};
//////////////////////////////////////////////////////
// loadToSmem
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
{
smem[tid] = data;
}
template <typename T>
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
{
data = smem[tid];
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
}
//////////////////////////////////////////////////////
// copyVals
template <typename V>
__device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
{
val = shfl_down(val, delta, width);
}
template <typename V>
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
{
svals[tid] = val = svals[tid + delta];
}
template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int delta,
int width)
{
For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int tid, unsigned int delta)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
}
//////////////////////////////////////////////////////
// merge
template <typename K, typename V, class Cmp>
__device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
{
K reg = shfl_down(key, delta, width);
if (cmp(reg, key))
{
key = reg;
copyValsShfl(val, delta, width);
}
}
template <typename K, typename V, class Cmp>
__device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename K,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void mergeShfl(K& key,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const Cmp& cmp,
unsigned int delta, int width)
{
K reg = shfl_down(key, delta, width);
if (cmp(reg, key))
{
key = reg;
copyValsShfl(val, delta, width);
}
}
template <typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void merge(volatile K* skeys, K& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const Cmp& cmp, unsigned int tid, unsigned int delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
unsigned int delta, int width)
{
For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
}
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
unsigned int tid, unsigned int delta)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
}
//////////////////////////////////////////////////////
// Generic
template <unsigned int N> struct Generic
{
template <class KP, class KR, class VP, class VR, class Cmp>
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
loadToSmem(skeys, key, tid);
loadValsToSmem(svals, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(skeys, key, svals, val, cmp, tid, 1024);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(skeys, key, svals, val, cmp, tid, 512);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(skeys, key, svals, val, cmp, tid, 256);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(skeys, key, svals, val, cmp, tid, 128);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(skeys, key, svals, val, cmp, tid, 64);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(skeys, key, svals, val, cmp, tid, 32);
}
if (tid < 16)
{
merge(skeys, key, svals, val, cmp, tid, 16);
merge(skeys, key, svals, val, cmp, tid, 8);
merge(skeys, key, svals, val, cmp, tid, 4);
merge(skeys, key, svals, val, cmp, tid, 2);
merge(skeys, key, svals, val, cmp, tid, 1);
}
}
};
template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
struct Unroll
{
static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
{
mergeShfl(key, val, cmp, I, N);
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
}
static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
merge(skeys, key, svals, val, cmp, tid, I);
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
}
};
template <class KP, class KR, class VP, class VR, class Cmp>
struct Unroll<0, KP, KR, VP, VR, Cmp>
{
static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
{
}
static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
{
}
};
template <unsigned int N> struct WarpOptimized
{
template <class KP, class KR, class VP, class VR, class Cmp>
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
#if 0 // __CUDA_ARCH__ >= 300
CV_UNUSED(skeys);
CV_UNUSED(svals);
CV_UNUSED(tid);
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
#else
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (tid < N / 2)
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
#endif
}
};
template <unsigned int N> struct GenericOptimized32
{
enum { M = N / 32 };
template <class KP, class KR, class VP, class VR, class Cmp>
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
const unsigned int laneId = Warp::laneId();
#if 0 // __CUDA_ARCH__ >= 300
Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
if (laneId == 0)
{
loadToSmem(skeys, key, tid / 32);
loadToSmem(svals, val, tid / 32);
}
#else
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (laneId < 16)
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
__syncthreads();
if (laneId == 0)
{
loadToSmem(skeys, key, tid / 32);
loadToSmem(svals, val, tid / 32);
}
#endif
__syncthreads();
loadFromSmem(skeys, key, tid);
if (tid < 32)
{
#if 0 // __CUDA_ARCH__ >= 300
loadFromSmem(svals, val, tid);
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
#else
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
#endif
}
}
};
template <bool val, class T1, class T2> struct StaticIf;
template <class T1, class T2> struct StaticIf<true, T1, T2>
{
typedef T1 type;
};
template <class T1, class T2> struct StaticIf<false, T1, T2>
{
typedef T2 type;
};
template <unsigned int N> struct IsPowerOf2
{
enum { value = ((N != 0) && !(N & (N - 1))) };
};
template <unsigned int N> struct Dispatcher
{
typedef typename StaticIf<
(N <= 32) && IsPowerOf2<N>::value,
WarpOptimized<N>,
typename StaticIf<
(N <= 1024) && IsPowerOf2<N>::value,
GenericOptimized32<N>,
Generic<N>
>::type
>::type reductor;
};
}
}}}
//! @endcond
#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP

View File

@ -0,0 +1,392 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
#include "../common.hpp"
#include "../vec_traits.hpp"
#include "../functional.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace transform_detail
{
//! Read Write Traits
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
{
typedef typename TypeVec<T, shift>::vec_type read_type;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
{
typedef typename TypeVec<T1, shift>::vec_type read_type1;
typedef typename TypeVec<T2, shift>::vec_type read_type2;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
//! Transform kernels
template <int shift> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <> struct OpUnroller<8>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src.a7);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src1.a0, src2.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src1.a1, src2.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src1.a2, src2.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src1.a3, src2.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src1.a4, src2.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src1.a5, src2.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src1.a6, src2.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src1.a7, src2.a7);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
{
typedef TransformFunctorTraits<UnOp> ft;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src_.cols)
{
const read_type src_n_el = ((const read_type*)src)[x];
OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
dst.ptr(y)[x] = op(src.ptr(y)[x]);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
const Mask mask, const BinOp op)
{
typedef TransformFunctorTraits<BinOp> ft;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
{
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
const Mask mask, const BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
const T1 src1_data = src1.ptr(y)[x];
const T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
template <bool UseSmart> struct TransformDispatcher;
template<> struct TransformDispatcher<false>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
CV_StaticAssert(ft::smart_shift != 1, "");
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
CV_StaticAssert(ft::smart_shift != 1, "");
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP

View File

@ -0,0 +1,191 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
#include "../common.hpp"
#include "../vec_traits.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace type_traits_detail
{
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
template <> struct IsIntegral<char> { enum {value = 1}; };
template <> struct IsIntegral<bool> { enum {value = 1}; };
template <typename T> struct IsFloat { enum {value = 0}; };
template <> struct IsFloat<float> { enum {value = 1}; };
template <> struct IsFloat<double> { enum {value = 1}; };
template <typename T> struct IsVec { enum {value = 0}; };
template <> struct IsVec<uchar1> { enum {value = 1}; };
template <> struct IsVec<uchar2> { enum {value = 1}; };
template <> struct IsVec<uchar3> { enum {value = 1}; };
template <> struct IsVec<uchar4> { enum {value = 1}; };
template <> struct IsVec<uchar8> { enum {value = 1}; };
template <> struct IsVec<char1> { enum {value = 1}; };
template <> struct IsVec<char2> { enum {value = 1}; };
template <> struct IsVec<char3> { enum {value = 1}; };
template <> struct IsVec<char4> { enum {value = 1}; };
template <> struct IsVec<char8> { enum {value = 1}; };
template <> struct IsVec<ushort1> { enum {value = 1}; };
template <> struct IsVec<ushort2> { enum {value = 1}; };
template <> struct IsVec<ushort3> { enum {value = 1}; };
template <> struct IsVec<ushort4> { enum {value = 1}; };
template <> struct IsVec<ushort8> { enum {value = 1}; };
template <> struct IsVec<short1> { enum {value = 1}; };
template <> struct IsVec<short2> { enum {value = 1}; };
template <> struct IsVec<short3> { enum {value = 1}; };
template <> struct IsVec<short4> { enum {value = 1}; };
template <> struct IsVec<short8> { enum {value = 1}; };
template <> struct IsVec<uint1> { enum {value = 1}; };
template <> struct IsVec<uint2> { enum {value = 1}; };
template <> struct IsVec<uint3> { enum {value = 1}; };
template <> struct IsVec<uint4> { enum {value = 1}; };
template <> struct IsVec<uint8> { enum {value = 1}; };
template <> struct IsVec<int1> { enum {value = 1}; };
template <> struct IsVec<int2> { enum {value = 1}; };
template <> struct IsVec<int3> { enum {value = 1}; };
template <> struct IsVec<int4> { enum {value = 1}; };
template <> struct IsVec<int8> { enum {value = 1}; };
template <> struct IsVec<float1> { enum {value = 1}; };
template <> struct IsVec<float2> { enum {value = 1}; };
template <> struct IsVec<float3> { enum {value = 1}; };
template <> struct IsVec<float4> { enum {value = 1}; };
template <> struct IsVec<float8> { enum {value = 1}; };
template <> struct IsVec<double1> { enum {value = 1}; };
template <> struct IsVec<double2> { enum {value = 1}; };
template <> struct IsVec<double3> { enum {value = 1}; };
template <> struct IsVec<double4> { enum {value = 1}; };
template <> struct IsVec<double8> { enum {value = 1}; };
template <class U> struct AddParameterType { typedef const U& type; };
template <class U> struct AddParameterType<U&> { typedef U& type; };
template <> struct AddParameterType<void> { typedef void type; };
template <class U> struct ReferenceTraits
{
enum { value = false };
typedef U type;
};
template <class U> struct ReferenceTraits<U&>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits
{
enum { value = false };
typedef void type;
};
template <class U> struct PointerTraits<U*>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits<U*&>
{
enum { value = true };
typedef U type;
};
template <class U> struct UnConst
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnConst<const U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnConst<const U&>
{
typedef U& type;
enum { value = 1 };
};
template <class U> struct UnVolatile
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnVolatile<volatile U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnVolatile<volatile U&>
{
typedef U& type;
enum { value = 1 };
};
} // namespace type_traits_detail
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP

View File

@ -0,0 +1,121 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
#include "../datamov_utils.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace vec_distance_detail
{
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
{
template <typename Dist, typename T1, typename T2>
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
{
if (ind < len)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, ind, val2);
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
}
}
template <typename Dist, typename T1, typename T2>
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, 0, val2);
vecGlob += THREAD_DIM;
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
}
};
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
{
}
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
{
}
};
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
}
};
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
}
};
} // namespace vec_distance_detail
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP

View File

@ -0,0 +1,88 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
#define OPENCV_CUDA_DYNAMIC_SMEM_HPP
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<class T> struct DynamicSharedMem
{
__device__ __forceinline__ operator T*()
{
extern __shared__ int __smem[];
return (T*)__smem;
}
__device__ __forceinline__ operator const T*() const
{
extern __shared__ int __smem[];
return (T*)__smem;
}
};
// specialize for double to avoid unaligned memory access compile errors
template<> struct DynamicSharedMem<double>
{
__device__ __forceinline__ operator double*()
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
__device__ __forceinline__ operator const double*() const
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
};
}}}
//! @endcond
#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP

View File

@ -0,0 +1,269 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_EMULATION_HPP_
#define OPENCV_CUDA_EMULATION_HPP_
#include "common.hpp"
#include "warp_reduce.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct Emulation
{
static __device__ __forceinline__ int syncthreadsOr(int pred)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// just campilation stab
return 0;
#else
return __syncthreads_or(pred);
#endif
}
template<int CTA_SIZE>
static __forceinline__ __device__ int Ballot(int predicate)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
return __ballot(predicate);
#else
__shared__ volatile int cta_buffer[CTA_SIZE];
int tid = threadIdx.x;
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
return warp_reduce(cta_buffer);
#endif
}
struct smem
{
enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
template<typename T>
static __device__ __forceinline__ T atomicInc(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count;
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
do
{
count = *address & TAG_MASK;
count = tag | (count + 1);
*address = count;
} while (*address != count);
return (count & TAG_MASK) - 1;
#else
return ::atomicInc(address, val);
#endif
}
template<typename T>
static __device__ __forceinline__ T atomicAdd(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count;
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
do
{
count = *address & TAG_MASK;
count = tag | (count + val);
*address = count;
} while (*address != count);
return (count & TAG_MASK) - val;
#else
return ::atomicAdd(address, val);
#endif
}
template<typename T>
static __device__ __forceinline__ T atomicMin(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count = ::min(*address, val);
do
{
*address = count;
} while (*address > count);
return count;
#else
return ::atomicMin(address, val);
#endif
}
}; // struct cmem
struct glob
{
static __device__ __forceinline__ int atomicAdd(int* address, int val)
{
return ::atomicAdd(address, val);
}
static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
{
return ::atomicAdd(address, val);
}
static __device__ __forceinline__ float atomicAdd(float* address, float val)
{
#if __CUDA_ARCH__ >= 200
return ::atomicAdd(address, val);
#else
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(val + __int_as_float(assumed)));
} while (assumed != old);
return __int_as_float(old);
#endif
}
static __device__ __forceinline__ double atomicAdd(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
#else
CV_UNUSED(address);
CV_UNUSED(val);
return 0.0;
#endif
}
static __device__ __forceinline__ int atomicMin(int* address, int val)
{
return ::atomicMin(address, val);
}
static __device__ __forceinline__ float atomicMin(float* address, float val)
{
#if __CUDA_ARCH__ >= 120
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fminf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
#else
CV_UNUSED(address);
CV_UNUSED(val);
return 0.0f;
#endif
}
static __device__ __forceinline__ double atomicMin(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
#else
CV_UNUSED(address);
CV_UNUSED(val);
return 0.0;
#endif
}
static __device__ __forceinline__ int atomicMax(int* address, int val)
{
return ::atomicMax(address, val);
}
static __device__ __forceinline__ float atomicMax(float* address, float val)
{
#if __CUDA_ARCH__ >= 120
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
#else
CV_UNUSED(address);
CV_UNUSED(val);
return 0.0f;
#endif
}
static __device__ __forceinline__ double atomicMax(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
#else
CV_UNUSED(address);
CV_UNUSED(val);
return 0.0;
#endif
}
};
}; //struct Emulation
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif /* OPENCV_CUDA_EMULATION_HPP_ */

View File

@ -0,0 +1,286 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_FILTERS_HPP
#define OPENCV_CUDA_FILTERS_HPP
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
#include "type_traits.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
CV_UNUSED(fx);
CV_UNUSED(fy);
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rz(y), __float2int_rz(x));
}
Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
CV_UNUSED(fx);
CV_UNUSED(fy);
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
Ptr2D src;
};
template <typename Ptr2D> struct CubicFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
CV_UNUSED(fx);
CV_UNUSED(fy);
}
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
float x = fabsf(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__device__ elem_type operator ()(float y, float x) const
{
const float xmin = ::ceilf(x - 2.0f);
const float xmax = ::floorf(x + 2.0f);
const float ymin = ::ceilf(y - 2.0f);
const float ymax = ::floorf(y + 2.0f);
work_type sum = VecTraits<work_type>::all(0);
float wsum = 0.0f;
for (float cy = ymin; cy <= ymax; cy += 1.0f)
{
for (float cx = xmin; cx <= xmax; cx += 1.0f)
{
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
wsum += w;
}
}
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
return saturate_cast<elem_type>(res);
}
Ptr2D src;
};
// for integer scaling
template <typename Ptr2D> struct IntegerAreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for(int dy = sy1; dy < sy2; ++dy)
for(int dx = sx1; dx < sx2; ++dx)
{
out = out + src(dy, dx) * scale;
}
return saturate_cast<elem_type>(out);
}
Ptr2D src;
float scale_x, scale_y ,scale;
};
template <typename Ptr2D> struct AreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(dy, dx) * scale;
if (sx1 > fsx1)
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return saturate_cast<elem_type>(out);
}
Ptr2D src;
float scale_x, scale_y;
int width, haight;
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_FILTERS_HPP

View File

@ -0,0 +1,79 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
#include <cstdio>
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<class Func>
void printFuncAttrib(Func& func)
{
cudaFuncAttributes attrs;
cudaFuncGetAttributes(&attrs, func);
printf("=== Function stats ===\n");
printf("Name: \n");
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
printf("numRegs = %d\n", attrs.numRegs);
printf("ptxVersion = %d\n", attrs.ptxVersion);
printf("binaryVersion = %d\n", attrs.binaryVersion);
printf("\n");
fflush(stdout);
}
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */

View File

@ -0,0 +1,811 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_FUNCTIONAL_HPP
#define OPENCV_CUDA_FUNCTIONAL_HPP
#include <functional>
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "type_traits.hpp"
#include "device_functions.h"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// Function Objects
#ifdef CV_CXX11
template<typename Argument, typename Result> struct unary_function
{
typedef Argument argument_type;
typedef Result result_type;
};
template<typename Argument1, typename Argument2, typename Result> struct binary_function
{
typedef Argument1 first_argument_type;
typedef Argument2 second_argument_type;
typedef Result result_type;
};
#else
template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
#endif
// Arithmetic Operations
template <typename T> struct plus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a + b;
}
__host__ __device__ __forceinline__ plus() {}
__host__ __device__ __forceinline__ plus(const plus&) {}
};
template <typename T> struct minus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a - b;
}
__host__ __device__ __forceinline__ minus() {}
__host__ __device__ __forceinline__ minus(const minus&) {}
};
template <typename T> struct multiplies : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a * b;
}
__host__ __device__ __forceinline__ multiplies() {}
__host__ __device__ __forceinline__ multiplies(const multiplies&) {}
};
template <typename T> struct divides : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a / b;
}
__host__ __device__ __forceinline__ divides() {}
__host__ __device__ __forceinline__ divides(const divides&) {}
};
template <typename T> struct modulus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a % b;
}
__host__ __device__ __forceinline__ modulus() {}
__host__ __device__ __forceinline__ modulus(const modulus&) {}
};
template <typename T> struct negate : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
{
return -a;
}
__host__ __device__ __forceinline__ negate() {}
__host__ __device__ __forceinline__ negate(const negate&) {}
};
// Comparison Operations
template <typename T> struct equal_to : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a == b;
}
__host__ __device__ __forceinline__ equal_to() {}
__host__ __device__ __forceinline__ equal_to(const equal_to&) {}
};
template <typename T> struct not_equal_to : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a != b;
}
__host__ __device__ __forceinline__ not_equal_to() {}
__host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
};
template <typename T> struct greater : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a > b;
}
__host__ __device__ __forceinline__ greater() {}
__host__ __device__ __forceinline__ greater(const greater&) {}
};
template <typename T> struct less : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a < b;
}
__host__ __device__ __forceinline__ less() {}
__host__ __device__ __forceinline__ less(const less&) {}
};
template <typename T> struct greater_equal : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a >= b;
}
__host__ __device__ __forceinline__ greater_equal() {}
__host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
};
template <typename T> struct less_equal : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a <= b;
}
__host__ __device__ __forceinline__ less_equal() {}
__host__ __device__ __forceinline__ less_equal(const less_equal&) {}
};
// Logical Operations
template <typename T> struct logical_and : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a && b;
}
__host__ __device__ __forceinline__ logical_and() {}
__host__ __device__ __forceinline__ logical_and(const logical_and&) {}
};
template <typename T> struct logical_or : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a || b;
}
__host__ __device__ __forceinline__ logical_or() {}
__host__ __device__ __forceinline__ logical_or(const logical_or&) {}
};
template <typename T> struct logical_not : unary_function<T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
{
return !a;
}
__host__ __device__ __forceinline__ logical_not() {}
__host__ __device__ __forceinline__ logical_not(const logical_not&) {}
};
// Bitwise Operations
template <typename T> struct bit_and : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a & b;
}
__host__ __device__ __forceinline__ bit_and() {}
__host__ __device__ __forceinline__ bit_and(const bit_and&) {}
};
template <typename T> struct bit_or : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a | b;
}
__host__ __device__ __forceinline__ bit_or() {}
__host__ __device__ __forceinline__ bit_or(const bit_or&) {}
};
template <typename T> struct bit_xor : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a ^ b;
}
__host__ __device__ __forceinline__ bit_xor() {}
__host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
};
template <typename T> struct bit_not : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
{
return ~v;
}
__host__ __device__ __forceinline__ bit_not() {}
__host__ __device__ __forceinline__ bit_not(const bit_not&) {}
};
// Generalized Identity Operations
template <typename T> struct identity : unary_function<T, T>
{
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
{
return x;
}
__host__ __device__ __forceinline__ identity() {}
__host__ __device__ __forceinline__ identity(const identity&) {}
};
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
{
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
{
return lhs;
}
__host__ __device__ __forceinline__ project1st() {}
__host__ __device__ __forceinline__ project1st(const project1st&) {}
};
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
{
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
{
return rhs;
}
__host__ __device__ __forceinline__ project2nd() {}
__host__ __device__ __forceinline__ project2nd(const project2nd&) {}
};
// Min/Max Operations
#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
template <> struct name<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
__host__ __device__ __forceinline__ name() {}\
__host__ __device__ __forceinline__ name(const name&) {}\
};
template <typename T> struct maximum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
{
return max(lhs, rhs);
}
__host__ __device__ __forceinline__ maximum() {}
__host__ __device__ __forceinline__ maximum(const maximum&) {}
};
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
template <typename T> struct minimum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
{
return min(lhs, rhs);
}
__host__ __device__ __forceinline__ minimum() {}
__host__ __device__ __forceinline__ minimum(const minimum&) {}
};
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
#undef OPENCV_CUDA_IMPLEMENT_MINMAX
// Math functions
template <typename T> struct abs_func : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
{
return abs(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
{
__device__ __forceinline__ unsigned char operator ()(unsigned char x) const
{
return x;
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
{
__device__ __forceinline__ signed char operator ()(signed char x) const
{
return ::abs((int)x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<char> : unary_function<char, char>
{
__device__ __forceinline__ char operator ()(char x) const
{
return ::abs((int)x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
{
__device__ __forceinline__ unsigned short operator ()(unsigned short x) const
{
return x;
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<short> : unary_function<short, short>
{
__device__ __forceinline__ short operator ()(short x) const
{
return ::abs((int)x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
{
__device__ __forceinline__ unsigned int operator ()(unsigned int x) const
{
return x;
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<int> : unary_function<int, int>
{
__device__ __forceinline__ int operator ()(int x) const
{
return ::abs(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<float> : unary_function<float, float>
{
__device__ __forceinline__ float operator ()(float x) const
{
return ::fabsf(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<double> : unary_function<double, double>
{
__device__ __forceinline__ double operator ()(double x) const
{
return ::fabs(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
template <typename T> struct name ## _func : unary_function<T, float> \
{ \
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
{ \
return func ## f(v); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : unary_function<double, double> \
{ \
__device__ __forceinline__ double operator ()(double v) const \
{ \
return func(v); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
template <typename T> struct name ## _func : binary_function<T, T, float> \
{ \
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
{ \
return func ## f(v1, v2); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : binary_function<double, double, double> \
{ \
__device__ __forceinline__ double operator ()(double v1, double v2) const \
{ \
return func(v1, v2); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
#undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
{
return src1 * src1 + src2 * src2;
}
__host__ __device__ __forceinline__ hypot_sqr_func() {}
__host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
};
// Saturate Cast Functor
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
{
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
{
return saturate_cast<D>(v);
}
__host__ __device__ __forceinline__ saturate_cast_func() {}
__host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
};
// Threshold Functors
template <typename T> struct thresh_binary_func : unary_function<T, T>
{
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src > thresh) * maxVal;
}
__host__ __device__ __forceinline__ thresh_binary_func() {}
__host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
: thresh(other.thresh), maxVal(other.maxVal) {}
T thresh;
T maxVal;
};
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
{
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src <= thresh) * maxVal;
}
__host__ __device__ __forceinline__ thresh_binary_inv_func() {}
__host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
: thresh(other.thresh), maxVal(other.maxVal) {}
T thresh;
T maxVal;
};
template <typename T> struct thresh_trunc_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return minimum<T>()(src, thresh);
}
__host__ __device__ __forceinline__ thresh_trunc_func() {}
__host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
: thresh(other.thresh) {}
T thresh;
};
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src > thresh) * src;
}
__host__ __device__ __forceinline__ thresh_to_zero_func() {}
__host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
: thresh(other.thresh) {}
T thresh;
};
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src <= thresh) * src;
}
__host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
__host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
: thresh(other.thresh) {}
T thresh;
};
// Function Object Adaptors
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
{
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
{
return !pred(x);
}
__host__ __device__ __forceinline__ unary_negate() {}
__host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
Predicate pred;
};
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
{
return unary_negate<Predicate>(pred);
}
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
{
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
{
return !pred(x,y);
}
__host__ __device__ __forceinline__ binary_negate() {}
__host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
Predicate pred;
};
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
{
return binary_negate<BinaryPredicate>(pred);
}
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
{
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
{
return op(arg1, a);
}
__host__ __device__ __forceinline__ binder1st() {}
__host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
Op op;
typename Op::first_argument_type arg1;
};
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
{
return binder1st<Op>(op, typename Op::first_argument_type(x));
}
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
{
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
{
return op(a, arg2);
}
__host__ __device__ __forceinline__ binder2nd() {}
__host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
Op op;
typename Op::second_argument_type arg2;
};
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
{
return binder2nd<Op>(op, typename Op::second_argument_type(x));
}
// Functor Traits
template <typename F> struct IsUnaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T, typename D> static Yes check(unary_function<T, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
template <typename F> struct IsBinaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
namespace functional_detail
{
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
template <typename T, typename D> struct DefaultUnaryShift
{
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
};
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
{
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
};
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
template <typename Func> struct ShiftDispatcher<Func, true>
{
enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
};
template <typename Func> struct ShiftDispatcher<Func, false>
{
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
};
}
template <typename Func> struct DefaultTransformShift
{
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
};
template <typename Func> struct DefaultTransformFunctorTraits
{
enum { simple_block_dim_x = 16 };
enum { simple_block_dim_y = 16 };
enum { smart_block_dim_x = 16 };
enum { smart_block_dim_y = 16 };
enum { smart_shift = DefaultTransformShift<Func>::shift };
};
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_FUNCTIONAL_HPP

View File

@ -0,0 +1,128 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_LIMITS_HPP
#define OPENCV_CUDA_LIMITS_HPP
#include <limits.h>
#include <float.h>
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <class T> struct numeric_limits;
template <> struct numeric_limits<bool>
{
__device__ __forceinline__ static bool min() { return false; }
__device__ __forceinline__ static bool max() { return true; }
static const bool is_signed = false;
};
template <> struct numeric_limits<signed char>
{
__device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
__device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned char>
{
__device__ __forceinline__ static unsigned char min() { return 0; }
__device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<short>
{
__device__ __forceinline__ static short min() { return SHRT_MIN; }
__device__ __forceinline__ static short max() { return SHRT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned short>
{
__device__ __forceinline__ static unsigned short min() { return 0; }
__device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<int>
{
__device__ __forceinline__ static int min() { return INT_MIN; }
__device__ __forceinline__ static int max() { return INT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned int>
{
__device__ __forceinline__ static unsigned int min() { return 0; }
__device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<float>
{
__device__ __forceinline__ static float min() { return FLT_MIN; }
__device__ __forceinline__ static float max() { return FLT_MAX; }
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
static const bool is_signed = true;
};
template <> struct numeric_limits<double>
{
__device__ __forceinline__ static double min() { return DBL_MIN; }
__device__ __forceinline__ static double max() { return DBL_MAX; }
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
static const bool is_signed = true;
};
}}} // namespace cv { namespace cuda { namespace cudev {
//! @endcond
#endif // OPENCV_CUDA_LIMITS_HPP

View File

@ -0,0 +1,209 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_REDUCE_HPP
#define OPENCV_CUDA_REDUCE_HPP
#ifndef THRUST_DEBUG // eliminate -Wundef warning
#define THRUST_DEBUG 0
#endif
#include <thrust/tuple.h>
#include "detail/reduce.hpp"
#include "detail/reduce_key_val.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <int N, typename T, class Op>
__device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
{
reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
}
template <int N,
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
reduce_detail::Dispatcher<N>::reductor::template reduce<
const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
}
template <unsigned int N, typename K, typename V, class Cmp>
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
{
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <unsigned int N,
typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int tid, const Cmp& cmp)
{
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <unsigned int N,
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int tid,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
{
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
>(skeys, key, svals, val, tid, cmp);
}
// smem_tuple
template <typename T0>
__device__ __forceinline__
thrust::tuple<volatile T0*>
smem_tuple(T0* t0)
{
return thrust::make_tuple((volatile T0*) t0);
}
template <typename T0, typename T1>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*>
smem_tuple(T0* t0, T1* t1)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
}
template <typename T0, typename T1, typename T2>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
smem_tuple(T0* t0, T1* t1, T2* t2)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
}
template <typename T0, typename T1, typename T2, typename T3>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
}
}}}
//! @endcond
#endif // OPENCV_CUDA_REDUCE_HPP

View File

@ -0,0 +1,292 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_SATURATE_CAST_HPP
#define OPENCV_CUDA_SATURATE_CAST_HPP
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{
uint res = 0;
int vi = v;
asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{
uint res = 0;
asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{
uint res = 0;
asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{
uint res = 0;
asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{
uint res = 0;
asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{
uint res = 0;
asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
uint res = 0;
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
return res;
#else
return saturate_cast<uchar>((float)v);
#endif
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{
uint res = 0;
uint vi = v;
asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
{
uint res = 0;
asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{
uint res = 0;
asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
{
uint res = 0;
asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{
uint res = 0;
asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
{
uint res = 0;
asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
uint res = 0;
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
return res;
#else
return saturate_cast<schar>((float)v);
#endif
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{
ushort res = 0;
int vi = v;
asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{
ushort res = 0;
asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{
ushort res = 0;
asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{
ushort res = 0;
asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{
ushort res = 0;
asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
ushort res = 0;
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
return res;
#else
return saturate_cast<ushort>((float)v);
#endif
}
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
{
short res = 0;
asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
{
short res = 0;
asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
{
short res = 0;
asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
{
short res = 0;
asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
short res = 0;
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
return res;
#else
return saturate_cast<short>((float)v);
#endif
}
template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
{
int res = 0;
asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
{
return __float2int_rn(v);
}
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2int_rn(v);
#else
return saturate_cast<int>((float)v);
#endif
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
{
uint res = 0;
int vi = v;
asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
{
uint res = 0;
asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
{
uint res = 0;
asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
{
return __float2uint_rn(v);
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2uint_rn(v);
#else
return saturate_cast<uint>((float)v);
#endif
}
}}}
//! @endcond
#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */

View File

@ -0,0 +1,258 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_SCAN_HPP
#define OPENCV_CUDA_SCAN_HPP
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
#include "opencv2/core/cuda/warp.hpp"
#include "opencv2/core/cuda/warp_shuffle.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
template <ScanKind Kind, typename T, typename F> struct WarpScan
{
__device__ __forceinline__ WarpScan() {}
__device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
{
const unsigned int lane = idx & 31;
F op;
if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
if( Kind == INCLUSIVE )
return ptr [idx];
else
return (lane > 0) ? ptr [idx - 1] : 0;
}
__device__ __forceinline__ unsigned int index(const unsigned int tid)
{
return tid;
}
__device__ __forceinline__ void init(volatile T *ptr){}
static const int warp_offset = 0;
typedef WarpScan<INCLUSIVE, T, F> merge;
};
template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
{
__device__ __forceinline__ WarpScanNoComp() {}
__device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
{
const unsigned int lane = threadIdx.x & 31;
F op;
ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
if( Kind == INCLUSIVE )
return ptr [idx];
else
return (lane > 0) ? ptr [idx - 1] : 0;
}
__device__ __forceinline__ unsigned int index(const unsigned int tid)
{
return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
}
__device__ __forceinline__ void init(volatile T *ptr)
{
ptr[threadIdx.x] = 0;
}
static const int warp_smem_stride = 32 + 16 + 1;
static const int warp_offset = 16;
static const int warp_log = 5;
static const int warp_mask = 31;
typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
};
template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
{
__device__ __forceinline__ BlockScan() {}
__device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
__device__ __forceinline__ T operator()(volatile T *ptr)
{
const unsigned int tid = threadIdx.x;
const unsigned int lane = tid & warp_mask;
const unsigned int warp = tid >> warp_log;
Sc scan;
typename Sc::merge merge_scan;
const unsigned int idx = scan.index(tid);
T val = scan(ptr, idx);
__syncthreads ();
if( warp == 0)
scan.init(ptr);
__syncthreads ();
if( lane == 31 )
ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
__syncthreads ();
if( warp == 0 )
merge_scan(ptr, idx);
__syncthreads();
if ( warp > 0)
val = ptr [scan.warp_offset + warp - 1] + val;
__syncthreads ();
ptr[idx] = val;
__syncthreads ();
return val ;
}
static const int warp_log = 5;
static const int warp_mask = 31;
};
template <typename T>
__device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
{
#if __CUDA_ARCH__ >= 300
const unsigned int laneId = cv::cuda::device::Warp::laneId();
// scan on shuffl functions
#pragma unroll
for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
{
const T n = cv::cuda::device::shfl_up(idata, i);
if (laneId >= i)
idata += n;
}
return idata;
#else
unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
s_Data[pos] = 0;
pos += OPENCV_CUDA_WARP_SIZE;
s_Data[pos] = idata;
s_Data[pos] += s_Data[pos - 1];
s_Data[pos] += s_Data[pos - 2];
s_Data[pos] += s_Data[pos - 4];
s_Data[pos] += s_Data[pos - 8];
s_Data[pos] += s_Data[pos - 16];
return s_Data[pos];
#endif
}
template <typename T>
__device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
{
return warpScanInclusive(idata, s_Data, tid) - idata;
}
template <int tiNumScanThreads, typename T>
__device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
{
if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
{
//Bottom-level inclusive warp scan
T warpResult = warpScanInclusive(idata, s_Data, tid);
//Save top elements of each warp for exclusive warp scan
//sync to wait for warp scans to complete (because s_Data is being overwritten)
__syncthreads();
if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
{
s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
}
//wait for warp scans to complete
__syncthreads();
if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
{
//grab top warp elements
T val = s_Data[tid];
//calculate exclusive scan and write back to shared memory
s_Data[tid] = warpScanExclusive(val, s_Data, tid);
}
//return updated warp scans with exclusive scan results
__syncthreads();
return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
}
else
{
return warpScanInclusive(idata, s_Data, tid);
}
}
}}}
//! @endcond
#endif // OPENCV_CUDA_SCAN_HPP

View File

@ -0,0 +1,869 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of NVIDIA Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// 2
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = a ^ b; // sum bits
r = a + b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
r = r - s; // subtract out carry-out from low word
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = a ^ b; // sum bits
r = a - b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // borrow to high word
r = r + s; // compensate for borrow from low word
#endif
return r;
}
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u, v;
s = a & 0x0000ffff; // extract low halfword
r = b & 0x0000ffff; // extract low halfword
u = ::max(r, s); // maximum of low halfwords
v = ::min(r, s); // minimum of low halfwords
s = a & 0xffff0000; // extract high halfword
r = b & 0xffff0000; // extract high halfword
t = ::max(r, s); // maximum of high halfwords
s = ::min(r, s); // minimum of high halfwords
r = u | t; // maximum of both halfwords
s = v | s; // minimum of both halfwords
r = r - s; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
{
unsigned int r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
s = s >> 1;
s = r + s;
return s;
}
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
unsigned int s;
s = a ^ b;
r = a | b;
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
s = s >> 1;
r = r - s;
#endif
return r;
}
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vseteq2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetge2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetgt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetle2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetlt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetne2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::max(r, s); // maximum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::max(r, s); // maximum of high halfwords
r = t | u; // combine halfword maximums
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::min(r, s); // minimum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::min(r, s); // minimum of high halfwords
r = t | u; // combine halfword minimums
#endif
return r;
}
// 4
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t;
s = a ^ b; // sum bits
r = a & 0x7f7f7f7f; // clear msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // msb sum bits
r = r + t; // add without msbs, record carry-out in msbs
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
#endif /* __CUDA_ARCH__ >= 300 */
return r;
}
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t;
s = a ^ ~b; // inverted sum bits
r = a | 0x80808080; // set msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // inverted msb sum bits
r = r - t; // subtract w/o msbs, record inverted borrows in msb
r = r ^ s; // combine inverted msb sum bits and borrows
#endif
return r;
}
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
{
unsigned int r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
s = s >> 1;
s = r + s;
return s;
}
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
unsigned int c;
c = a ^ b;
r = a | b;
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
c = c >> 1;
r = r - c;
#endif
return r;
}
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r & ~c; // msb = 1, if r was 0x00
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
{
unsigned int r, t;
#if __CUDA_ARCH__ >= 300
r = vseteq4(a, b);
t = r << 8; // convert bool
r = t - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
t = a ^ b; // 0x00 if a == b
r = t | 0x80808080; // set msbs, to catch carry out
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
r = t & ~r; // msb = 1, if t was 0x00
t = r >> 7; // build mask
t = r - t; // from
r = t | r; // msbs
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetle4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetlt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
{
unsigned int r, s;
#if __CUDA_ARCH__ >= 300
r = vsetge4(a, b);
s = r << 8; // convert bool
r = s - r; // to mask
#else
asm ("not.b32 %0,%0;" : "+r"(b));
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
r = r & 0x80808080; // msb = carry-outs
s = r >> 7; // build mask
s = r - s; // from
r = s | r; // msbs
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetgt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetne4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a ^ b; //
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
r = s ^ r; // select a when b >= a, else select b => min(a,b)
r = s - r; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r; // byte-wise unsigned maximum
}
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(b, a); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r;
}
}}}
//! @endcond
#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP

View File

@ -0,0 +1,75 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_TRANSFORM_HPP
#define OPENCV_CUDA_TRANSFORM_HPP
#include "common.hpp"
#include "utility.hpp"
#include "detail/transform_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
}
}}}
//! @endcond
#endif // OPENCV_CUDA_TRANSFORM_HPP

View File

@ -0,0 +1,90 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
#define OPENCV_CUDA_TYPE_TRAITS_HPP
#include "detail/type_traits_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T> struct IsSimpleParameter
{
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
};
template <typename T> struct TypeTraits
{
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
enum { isConst = type_traits_detail::UnConst<T>::value };
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
enum { isArith = isIntegral || isFloat };
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
};
}}}
//! @endcond
#endif // OPENCV_CUDA_TYPE_TRAITS_HPP

View File

@ -0,0 +1,230 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_UTILITY_HPP
#define OPENCV_CUDA_UTILITY_HPP
#include "saturate_cast.hpp"
#include "datamov_utils.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct CV_EXPORTS ThrustAllocator
{
typedef uchar value_type;
virtual ~ThrustAllocator();
virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
static ThrustAllocator& getAllocator();
static void setAllocator(ThrustAllocator* allocator);
};
#define OPENCV_CUDA_LOG_WARP_SIZE (5)
#define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE)
#define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS)
///////////////////////////////////////////////////////////////////////////////
// swap
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
{
const T temp = a;
a = b;
b = temp;
}
///////////////////////////////////////////////////////////////////////////////
// Mask Reader
struct SingleMask
{
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x] != 0;
}
PtrStepb mask;
};
struct SingleMaskChannels
{
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
: mask(mask_), channels(channels_) {}
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
:mask(mask_.mask), channels(mask_.channels){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x / channels] != 0;
}
PtrStepb mask;
int channels;
};
struct MaskCollection
{
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
: maskCollection(maskCollection_) {}
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
__device__ __forceinline__ void next()
{
curMask = *maskCollection++;
}
__device__ __forceinline__ void setMask(int z)
{
curMask = maskCollection[z];
}
__device__ __forceinline__ bool operator()(int y, int x) const
{
uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
}
const PtrStepb* maskCollection;
PtrStepb curMask;
};
struct WithOutMask
{
__host__ __device__ __forceinline__ WithOutMask(){}
__host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
__device__ __forceinline__ void next() const
{
}
__device__ __forceinline__ void setMask(int) const
{
}
__device__ __forceinline__ bool operator()(int, int) const
{
return true;
}
__device__ __forceinline__ bool operator()(int, int, int) const
{
return true;
}
static __device__ __forceinline__ bool check(int, int)
{
return true;
}
static __device__ __forceinline__ bool check(int, int, int)
{
return true;
}
};
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
// solve 2x2 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
{
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
return true;
}
return false;
}
// solve 3x3 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
{
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet *
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
x[1] = saturate_cast<T>(invdet *
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
x[2] = saturate_cast<T>(invdet *
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
return true;
}
return false;
}
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_UTILITY_HPP

View File

@ -0,0 +1,232 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
#define OPENCV_CUDA_VEC_DISTANCE_HPP
#include "reduce.hpp"
#include "functional.hpp"
#include "detail/vec_distance_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T> struct L1Dist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ L1Dist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
template <> struct L1Dist<float>
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
mySum += ::fabs(val1 - val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
}
__device__ __forceinline__ operator float() const
{
return mySum;
}
float mySum;
};
struct L2Dist
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
}
__device__ __forceinline__ operator float() const
{
return sqrtf(mySum);
}
float mySum;
};
struct HammingDist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ HammingDist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
for (int i = tid; i < len; i += THREAD_DIM)
{
T1 val1;
ForceGlob<T1>::Load(vec1, i, val1);
T2 val2;
ForceGlob<T2>::Load(vec2, i, val2);
dist.reduceIter(val1, val2);
}
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
{
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
{
vec1 = vec1_;
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
}
const T1* vec1;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
{
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
{
if (glob_tid < len)
smem[glob_tid] = vec1[glob_tid];
__syncthreads();
U* vec1ValsPtr = vec1Vals;
#pragma unroll
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
*vec1ValsPtr++ = smem[i];
__syncthreads();
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
}
U vec1Vals[MAX_LEN / THREAD_DIM];
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_VEC_DISTANCE_HPP

View File

@ -0,0 +1,930 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_VECMATH_HPP
#define OPENCV_CUDA_VECMATH_HPP
#include "vec_traits.hpp"
#include "saturate_cast.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// saturate_cast
namespace vec_math_detail
{
template <int cn, typename VecD> struct SatCastHelper;
template <typename VecD> struct SatCastHelper<1, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
}
};
template <typename VecD> struct SatCastHelper<2, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
}
};
template <typename VecD> struct SatCastHelper<3, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
}
};
template <typename VecD> struct SatCastHelper<4, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
};
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
{
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
}
}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
// unary operators
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
{ \
return VecTraits<output_type ## 1>::make(op (a.x)); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
{ \
return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
{ \
return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
{ \
return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
// unary functions
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
{ \
return VecTraits<output_type ## 1>::make(func (a.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
{ \
return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
{ \
return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
{ \
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
// binary operators (vec & vec)
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(a.x op b.x); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
}
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
// binary operators (vec & scalar)
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 1>::make(a.x op s); \
} \
__device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(s op b.x); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
} \
__device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
} \
__device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
} \
__device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
}
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
// binary function (vec & vec)
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
// binary function (vec & scalar)
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
}
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
}}} // namespace cv { namespace cuda { namespace device
//! @endcond
#endif // OPENCV_CUDA_VECMATH_HPP

View File

@ -0,0 +1,288 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_VEC_TRAITS_HPP
#define OPENCV_CUDA_VEC_TRAITS_HPP
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<typename T, int N> struct TypeVec;
struct __align__(8) uchar8
{
uchar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
{
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(8) char8
{
schar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
{
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) ushort8
{
ushort a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
{
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) short8
{
short a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
{
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) uint8
{
uint a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
{
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) int8
{
int a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) float8
{
float a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
{
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct double8
{
double a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
{
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
#undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
template<typename T> struct VecTraits;
#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
template<> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \
template<> struct VecTraits<type ## 8> \
{ \
typedef type elem_type; \
enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
};
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
#undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // OPENCV_CUDA_VEC_TRAITS_HPP

View File

@ -0,0 +1,139 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
#define OPENCV_CUDA_DEVICE_WARP_HPP
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct Warp
{
enum
{
LOG_WARP_SIZE = 5,
WARP_SIZE = 1 << LOG_WARP_SIZE,
STRIDE = WARP_SIZE
};
/** \brief Returns the warp lane ID of the calling thread. */
static __device__ __forceinline__ unsigned int laneId()
{
unsigned int ret;
asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
return ret;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
for(It t = beg + laneId(); t < end; t += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = *t;
return out;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = op(*t);
return out;
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
unsigned int lane = laneId();
InIt1 t1 = beg1 + lane;
InIt2 t2 = beg2 + lane;
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
*out = op(*t1, *t2);
return out;
}
template <class T, class BinOp>
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
{
const unsigned int lane = laneId();
if (lane < 16)
{
T partial = ptr[lane];
ptr[lane] = partial = op(partial, ptr[lane + 16]);
ptr[lane] = partial = op(partial, ptr[lane + 8]);
ptr[lane] = partial = op(partial, ptr[lane + 4]);
ptr[lane] = partial = op(partial, ptr[lane + 2]);
ptr[lane] = partial = op(partial, ptr[lane + 1]);
}
return *ptr;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
unsigned int lane = laneId();
value += lane;
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */

View File

@ -0,0 +1,76 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
#define OPENCV_CUDA_WARP_REDUCE_HPP__
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace cuda { namespace cudev {
//! @endcond
#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */

View File

@ -0,0 +1,162 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
#define OPENCV_CUDA_WARP_SHUFFLE_HPP
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
#if __CUDACC_VER_MAJOR__ >= 9
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
#endif
template <typename T>
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
return __shfl(val, srcLane, width);
#else
return T();
#endif
}
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
return (unsigned int) __shfl((int) val, srcLane, width);
#else
return 0;
#endif
}
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl(lo, srcLane, width);
hi = __shfl(hi, srcLane, width);
return __hiloint2double(hi, lo);
#else
return 0.0;
#endif
}
template <typename T>
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
return __shfl_down(val, delta, width);
#else
return T();
#endif
}
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
return (unsigned int) __shfl_down((int) val, delta, width);
#else
return 0;
#endif
}
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_down(lo, delta, width);
hi = __shfl_down(hi, delta, width);
return __hiloint2double(hi, lo);
#else
return 0.0;
#endif
}
template <typename T>
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
return __shfl_up(val, delta, width);
#else
return T();
#endif
}
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
return (unsigned int) __shfl_up((int) val, delta, width);
#else
return 0;
#endif
}
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_up(lo, delta, width);
hi = __shfl_up(hi, delta, width);
return __hiloint2double(hi, lo);
#else
return 0.0;
#endif
}
}}}
# undef __shfl
# undef __shfl_up
# undef __shfl_down
//! @endcond
#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP

View File

@ -0,0 +1,86 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
#ifndef __cplusplus
# error cuda_stream_accessor.hpp header must be compiled as C++
#endif
/** @file cuda_stream_accessor.hpp
* This is only header file that depends on CUDA Runtime API. All other headers are independent.
*/
#include <cuda_runtime.h>
#include "opencv2/core/cuda.hpp"
namespace cv
{
namespace cuda
{
//! @addtogroup cudacore_struct
//! @{
/** @brief Class that enables getting cudaStream_t from cuda::Stream
*/
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
};
/** @brief Class that enables getting cudaEvent_t from cuda::Event
*/
struct EventAccessor
{
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
};
//! @}
}
}
#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */

View File

@ -0,0 +1,142 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_CUDA_TYPES_HPP
#define OPENCV_CORE_CUDA_TYPES_HPP
#ifndef __cplusplus
# error cuda_types.hpp header must be compiled as C++
#endif
#if defined(__OPENCV_BUILD) && defined(__clang__)
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
#endif
#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
#ifdef __CUDACC__
#define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __CV_CUDA_HOST_DEVICE__
#endif
namespace cv
{
namespace cuda
{
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template <typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
enum { elem_size = sizeof(elem_type) };
T* data;
__CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
__CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
__CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_CUDA_HOST_DEVICE__ operator T*() { return data; }
__CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
};
template <typename T> struct PtrSz : public DevPtr<T>
{
__CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
size_t size;
};
template <typename T> struct PtrStep : public DevPtr<T>
{
__CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
size_t step;
__CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
template <typename U>
explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
int cols;
int rows;
};
typedef PtrStepSz<unsigned char> PtrStepSzb;
typedef PtrStepSz<float> PtrStepSzf;
typedef PtrStepSz<int> PtrStepSzi;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
}
}
//! @endcond
#endif /* OPENCV_CORE_CUDA_TYPES_HPP */

View File

@ -0,0 +1,247 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#if defined __OPENCV_BUILD \
#include "cv_cpu_config.h"
#include "cv_cpu_helper.h"
#ifdef CV_CPU_DISPATCH_MODE
#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#endif
#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */
#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
#if defined CV_ENABLE_INTRINSICS \
&& !defined CV_DISABLE_OPTIMIZATION \
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
#ifdef CV_CPU_COMPILE_SSE2
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
#endif
#ifdef CV_CPU_COMPILE_SSE3
# include <pmmintrin.h>
# define CV_SSE3 1
#endif
#ifdef CV_CPU_COMPILE_SSSE3
# include <tmmintrin.h>
# define CV_SSSE3 1
#endif
#ifdef CV_CPU_COMPILE_SSE4_1
# include <smmintrin.h>
# define CV_SSE4_1 1
#endif
#ifdef CV_CPU_COMPILE_SSE4_2
# include <nmmintrin.h>
# define CV_SSE4_2 1
#endif
#ifdef CV_CPU_COMPILE_POPCNT
# ifdef _MSC_VER
# include <nmmintrin.h>
# if defined(_M_X64)
# define CV_POPCNT_U64 _mm_popcnt_u64
# endif
# define CV_POPCNT_U32 _mm_popcnt_u32
# else
# include <popcntintrin.h>
# if defined(__x86_64__)
# define CV_POPCNT_U64 __builtin_popcountll
# endif
# define CV_POPCNT_U32 __builtin_popcount
# endif
# define CV_POPCNT 1
#endif
#ifdef CV_CPU_COMPILE_AVX
# include <immintrin.h>
# define CV_AVX 1
#endif
#ifdef CV_CPU_COMPILE_FP16
# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
# include <arm_neon.h>
# else
# include <immintrin.h>
# endif
# define CV_FP16 1
#endif
#ifdef CV_CPU_COMPILE_AVX2
# include <immintrin.h>
# define CV_AVX2 1
#endif
#ifdef CV_CPU_COMPILE_AVX_512F
# include <immintrin.h>
# define CV_AVX_512F 1
#endif
#ifdef CV_CPU_COMPILE_AVX512_SKX
# include <immintrin.h>
# define CV_AVX512_SKX 1
#endif
#ifdef CV_CPU_COMPILE_FMA3
# define CV_FMA3 1
#endif
#if defined _WIN32 && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#if defined(__ARM_NEON__) || defined(__aarch64__)
# include <arm_neon.h>
#endif
#ifdef CV_CPU_COMPILE_VSX
# include <altivec.h>
# undef vector
# undef pixel
# undef bool
# define CV_VSX 1
#endif
#ifdef CV_CPU_COMPILE_VSX3
# define CV_VSX3 1
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
struct VZeroUpperGuard {
#ifdef __GNUC__
__attribute__((always_inline))
#endif
inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
};
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
#endif
#ifdef __CV_AVX_GUARD
#define CV_AVX_GUARD __CV_AVX_GUARD
#else
#define CV_AVX_GUARD
#endif
#endif // __OPENCV_BUILD
#if !defined __OPENCV_BUILD /* Compatibility code */ \
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
#elif defined _WIN32 && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
# include <altivec.h>
# undef vector
# undef pixel
# undef bool
# define CV_VSX 1
#endif
#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
#ifndef CV_MMX
# define CV_MMX 0
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
#ifndef CV_SSE2
# define CV_SSE2 0
#endif
#ifndef CV_SSE3
# define CV_SSE3 0
#endif
#ifndef CV_SSSE3
# define CV_SSSE3 0
#endif
#ifndef CV_SSE4_1
# define CV_SSE4_1 0
#endif
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_POPCNT
# define CV_POPCNT 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif
#ifndef CV_FP16
# define CV_FP16 0
#endif
#ifndef CV_AVX2
# define CV_AVX2 0
#endif
#ifndef CV_FMA3
# define CV_FMA3 0
#endif
#ifndef CV_AVX_512F
# define CV_AVX_512F 0
#endif
#ifndef CV_AVX_512BW
# define CV_AVX_512BW 0
#endif
#ifndef CV_AVX_512CD
# define CV_AVX_512CD 0
#endif
#ifndef CV_AVX_512DQ
# define CV_AVX_512DQ 0
#endif
#ifndef CV_AVX_512ER
# define CV_AVX_512ER 0
#endif
#ifndef CV_AVX_512IFMA512
# define CV_AVX_512IFMA512 0
#endif
#ifndef CV_AVX_512PF
# define CV_AVX_512PF 0
#endif
#ifndef CV_AVX_512VBMI
# define CV_AVX_512VBMI 0
#endif
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_AVX512_SKX
# define CV_AVX512_SKX 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
#endif
#ifndef CV_VSX
# define CV_VSX 0
#endif
#ifndef CV_VSX3
# define CV_VSX3 0
#endif

View File

@ -0,0 +1,340 @@
// AUTOGENERATED, DO NOT EDIT
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
# define CV_TRY_SSE 1
# define CV_CPU_FORCE_SSE 1
# define CV_CPU_HAS_SUPPORT_SSE 1
# define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
# define CV_TRY_SSE 1
# define CV_CPU_FORCE_SSE 0
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
# define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
#else
# define CV_TRY_SSE 0
# define CV_CPU_FORCE_SSE 0
# define CV_CPU_HAS_SUPPORT_SSE 0
# define CV_CPU_CALL_SSE(fn, args)
# define CV_CPU_CALL_SSE_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
# define CV_TRY_SSE2 1
# define CV_CPU_FORCE_SSE2 1
# define CV_CPU_HAS_SUPPORT_SSE2 1
# define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
# define CV_TRY_SSE2 1
# define CV_CPU_FORCE_SSE2 0
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
# define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
#else
# define CV_TRY_SSE2 0
# define CV_CPU_FORCE_SSE2 0
# define CV_CPU_HAS_SUPPORT_SSE2 0
# define CV_CPU_CALL_SSE2(fn, args)
# define CV_CPU_CALL_SSE2_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
# define CV_TRY_SSE3 1
# define CV_CPU_FORCE_SSE3 1
# define CV_CPU_HAS_SUPPORT_SSE3 1
# define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
# define CV_TRY_SSE3 1
# define CV_CPU_FORCE_SSE3 0
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
# define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
#else
# define CV_TRY_SSE3 0
# define CV_CPU_FORCE_SSE3 0
# define CV_CPU_HAS_SUPPORT_SSE3 0
# define CV_CPU_CALL_SSE3(fn, args)
# define CV_CPU_CALL_SSE3_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
# define CV_TRY_SSSE3 1
# define CV_CPU_FORCE_SSSE3 1
# define CV_CPU_HAS_SUPPORT_SSSE3 1
# define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
# define CV_TRY_SSSE3 1
# define CV_CPU_FORCE_SSSE3 0
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
# define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
#else
# define CV_TRY_SSSE3 0
# define CV_CPU_FORCE_SSSE3 0
# define CV_CPU_HAS_SUPPORT_SSSE3 0
# define CV_CPU_CALL_SSSE3(fn, args)
# define CV_CPU_CALL_SSSE3_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
# define CV_TRY_SSE4_1 1
# define CV_CPU_FORCE_SSE4_1 1
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
# define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
# define CV_TRY_SSE4_1 1
# define CV_CPU_FORCE_SSE4_1 0
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
# define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
#else
# define CV_TRY_SSE4_1 0
# define CV_CPU_FORCE_SSE4_1 0
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
# define CV_CPU_CALL_SSE4_1(fn, args)
# define CV_CPU_CALL_SSE4_1_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
# define CV_TRY_SSE4_2 1
# define CV_CPU_FORCE_SSE4_2 1
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
# define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
# define CV_TRY_SSE4_2 1
# define CV_CPU_FORCE_SSE4_2 0
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
# define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
#else
# define CV_TRY_SSE4_2 0
# define CV_CPU_FORCE_SSE4_2 0
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
# define CV_CPU_CALL_SSE4_2(fn, args)
# define CV_CPU_CALL_SSE4_2_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
# define CV_TRY_POPCNT 1
# define CV_CPU_FORCE_POPCNT 1
# define CV_CPU_HAS_SUPPORT_POPCNT 1
# define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
# define CV_TRY_POPCNT 1
# define CV_CPU_FORCE_POPCNT 0
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
# define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
#else
# define CV_TRY_POPCNT 0
# define CV_CPU_FORCE_POPCNT 0
# define CV_CPU_HAS_SUPPORT_POPCNT 0
# define CV_CPU_CALL_POPCNT(fn, args)
# define CV_CPU_CALL_POPCNT_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
# define CV_TRY_AVX 1
# define CV_CPU_FORCE_AVX 1
# define CV_CPU_HAS_SUPPORT_AVX 1
# define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
# define CV_TRY_AVX 1
# define CV_CPU_FORCE_AVX 0
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
# define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
#else
# define CV_TRY_AVX 0
# define CV_CPU_FORCE_AVX 0
# define CV_CPU_HAS_SUPPORT_AVX 0
# define CV_CPU_CALL_AVX(fn, args)
# define CV_CPU_CALL_AVX_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
# define CV_TRY_FP16 1
# define CV_CPU_FORCE_FP16 1
# define CV_CPU_HAS_SUPPORT_FP16 1
# define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
# define CV_TRY_FP16 1
# define CV_CPU_FORCE_FP16 0
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
# define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
#else
# define CV_TRY_FP16 0
# define CV_CPU_FORCE_FP16 0
# define CV_CPU_HAS_SUPPORT_FP16 0
# define CV_CPU_CALL_FP16(fn, args)
# define CV_CPU_CALL_FP16_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
# define CV_TRY_AVX2 1
# define CV_CPU_FORCE_AVX2 1
# define CV_CPU_HAS_SUPPORT_AVX2 1
# define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
# define CV_TRY_AVX2 1
# define CV_CPU_FORCE_AVX2 0
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
# define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
#else
# define CV_TRY_AVX2 0
# define CV_CPU_FORCE_AVX2 0
# define CV_CPU_HAS_SUPPORT_AVX2 0
# define CV_CPU_CALL_AVX2(fn, args)
# define CV_CPU_CALL_AVX2_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
# define CV_TRY_FMA3 1
# define CV_CPU_FORCE_FMA3 1
# define CV_CPU_HAS_SUPPORT_FMA3 1
# define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
# define CV_TRY_FMA3 1
# define CV_CPU_FORCE_FMA3 0
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
# define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
#else
# define CV_TRY_FMA3 0
# define CV_CPU_FORCE_FMA3 0
# define CV_CPU_HAS_SUPPORT_FMA3 0
# define CV_CPU_CALL_FMA3(fn, args)
# define CV_CPU_CALL_FMA3_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
# define CV_TRY_AVX_512F 1
# define CV_CPU_FORCE_AVX_512F 1
# define CV_CPU_HAS_SUPPORT_AVX_512F 1
# define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
# define CV_TRY_AVX_512F 1
# define CV_CPU_FORCE_AVX_512F 0
# define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
# define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
# define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
#else
# define CV_TRY_AVX_512F 0
# define CV_CPU_FORCE_AVX_512F 0
# define CV_CPU_HAS_SUPPORT_AVX_512F 0
# define CV_CPU_CALL_AVX_512F(fn, args)
# define CV_CPU_CALL_AVX_512F_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
# define CV_TRY_AVX512_SKX 1
# define CV_CPU_FORCE_AVX512_SKX 1
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
# define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
# define CV_TRY_AVX512_SKX 1
# define CV_CPU_FORCE_AVX512_SKX 0
# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
# define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
#else
# define CV_TRY_AVX512_SKX 0
# define CV_CPU_FORCE_AVX512_SKX 0
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
# define CV_CPU_CALL_AVX512_SKX(fn, args)
# define CV_CPU_CALL_AVX512_SKX_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_TRY_NEON 1
# define CV_CPU_FORCE_NEON 1
# define CV_CPU_HAS_SUPPORT_NEON 1
# define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
# define CV_TRY_NEON 1
# define CV_CPU_FORCE_NEON 0
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
# define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
#else
# define CV_TRY_NEON 0
# define CV_CPU_FORCE_NEON 0
# define CV_CPU_HAS_SUPPORT_NEON 0
# define CV_CPU_CALL_NEON(fn, args)
# define CV_CPU_CALL_NEON_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
# define CV_TRY_VSX 1
# define CV_CPU_FORCE_VSX 1
# define CV_CPU_HAS_SUPPORT_VSX 1
# define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
# define CV_TRY_VSX 1
# define CV_CPU_FORCE_VSX 0
# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
# define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
#else
# define CV_TRY_VSX 0
# define CV_CPU_FORCE_VSX 0
# define CV_CPU_HAS_SUPPORT_VSX 0
# define CV_CPU_CALL_VSX(fn, args)
# define CV_CPU_CALL_VSX_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
# define CV_TRY_VSX3 1
# define CV_CPU_FORCE_VSX3 1
# define CV_CPU_HAS_SUPPORT_VSX3 1
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
# define CV_TRY_VSX3 1
# define CV_CPU_FORCE_VSX3 0
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
#else
# define CV_TRY_VSX3 0
# define CV_CPU_FORCE_VSX3 0
# define CV_CPU_HAS_SUPPORT_VSX3 0
# define CV_CPU_CALL_VSX3(fn, args)
# define CV_CPU_CALL_VSX3_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */

View File

@ -0,0 +1,753 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_CVDEF_H
#define OPENCV_CORE_CVDEF_H
//! @addtogroup core_utils
//! @{
#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
(defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
// Guard to prevent using of binary incompatible binaries / runtimes
// https://github.com/opencv/opencv/pull/9161
#define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
#define CV__DEBUG_NS_END }
namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
#endif
#endif
#ifndef CV__DEBUG_NS_BEGIN
#define CV__DEBUG_NS_BEGIN
#define CV__DEBUG_NS_END
#endif
#ifdef __OPENCV_BUILD
#include "cvconfig.h"
#endif
#ifndef __CV_EXPAND
#define __CV_EXPAND(x) x
#endif
#ifndef __CV_CAT
#define __CV_CAT__(x, y) x ## y
#define __CV_CAT_(x, y) __CV_CAT__(x, y)
#define __CV_CAT(x, y) __CV_CAT_(x, y)
#endif
#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
#define __CV_VA_NUM_ARGS(...) __CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#if defined __GNUC__
#define CV_Func __func__
#elif defined _MSC_VER
#define CV_Func __FUNCTION__
#else
#define CV_Func ""
#endif
//! @cond IGNORED
//////////////// static assert /////////////////
#define CVAUX_CONCAT_EXP(a, b) a##b
#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
#if defined(__clang__)
# ifndef __has_extension
# define __has_extension __has_feature /* compatibility, for older versions of clang */
# endif
# if __has_extension(cxx_static_assert)
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
# elif __has_extension(c_static_assert)
# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition)
# endif
#elif defined(__GNUC__)
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
# endif
#elif defined(_MSC_VER)
# if _MSC_VER >= 1600 /* MSVC 10 */
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
# endif
#endif
#ifndef CV_StaticAssert
# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
# else
template <bool x> struct CV_StaticAssert_failed;
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
template<int x> struct CV_StaticAssert_test {};
# define CV_StaticAssert(condition, reason)\
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
# endif
#endif
// Suppress warning "-Wdeprecated-declarations" / C4996
#if defined(_MSC_VER)
#define CV_DO_PRAGMA(x) __pragma(x)
#elif defined(__GNUC__)
#define CV_DO_PRAGMA(x) _Pragma (#x)
#else
#define CV_DO_PRAGMA(x)
#endif
#ifdef _MSC_VER
#define CV_SUPPRESS_DEPRECATED_START \
CV_DO_PRAGMA(warning(push)) \
CV_DO_PRAGMA(warning(disable: 4996))
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405))
#define CV_SUPPRESS_DEPRECATED_START \
CV_DO_PRAGMA(GCC diagnostic push) \
CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
#else
#define CV_SUPPRESS_DEPRECATED_START
#define CV_SUPPRESS_DEPRECATED_END
#endif
#define CV_UNUSED(name) (void)name
#if defined __GNUC__ && !defined __EXCEPTIONS
#define CV_TRY
#define CV_CATCH(A, B) for (A B; false; )
#define CV_CATCH_ALL if (false)
#define CV_THROW(A) abort()
#define CV_RETHROW() abort()
#else
#define CV_TRY try
#define CV_CATCH(A, B) catch(const A & B)
#define CV_CATCH_ALL catch(...)
#define CV_THROW(A) throw A
#define CV_RETHROW() throw
#endif
//! @endcond
// undef problematic defines sometimes defined by system headers (windows.h in particular)
#undef small
#undef min
#undef max
#undef abs
#undef Complex
#include <limits.h>
#include "opencv2/core/hal/interface.h"
#if defined __ICL
# define CV_ICC __ICL
#elif defined __ICC
# define CV_ICC __ICC
#elif defined __ECL
# define CV_ICC __ECL
#elif defined __ECC
# define CV_ICC __ECC
#elif defined __INTEL_COMPILER
# define CV_ICC __INTEL_COMPILER
#endif
#ifndef CV_INLINE
# if defined __cplusplus
# define CV_INLINE static inline
# elif defined _MSC_VER
# define CV_INLINE __inline
# else
# define CV_INLINE static
# endif
#endif
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
# define CV_ENABLE_UNROLLED 0
#else
# define CV_ENABLE_UNROLLED 1
#endif
#ifdef __GNUC__
# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
#elif defined _MSC_VER
# define CV_DECL_ALIGNED(x) __declspec(align(x))
#else
# define CV_DECL_ALIGNED(x)
#endif
/* CPU features and intrinsics support */
#define CV_CPU_NONE 0
#define CV_CPU_MMX 1
#define CV_CPU_SSE 2
#define CV_CPU_SSE2 3
#define CV_CPU_SSE3 4
#define CV_CPU_SSSE3 5
#define CV_CPU_SSE4_1 6
#define CV_CPU_SSE4_2 7
#define CV_CPU_POPCNT 8
#define CV_CPU_FP16 9
#define CV_CPU_AVX 10
#define CV_CPU_AVX2 11
#define CV_CPU_FMA3 12
#define CV_CPU_AVX_512F 13
#define CV_CPU_AVX_512BW 14
#define CV_CPU_AVX_512CD 15
#define CV_CPU_AVX_512DQ 16
#define CV_CPU_AVX_512ER 17
#define CV_CPU_AVX_512IFMA512 18 // deprecated
#define CV_CPU_AVX_512IFMA 18
#define CV_CPU_AVX_512PF 19
#define CV_CPU_AVX_512VBMI 20
#define CV_CPU_AVX_512VL 21
#define CV_CPU_NEON 100
#define CV_CPU_VSX 200
#define CV_CPU_VSX3 201
// CPU features groups
#define CV_CPU_AVX512_SKX 256
// when adding to this list remember to update the following enum
#define CV_HARDWARE_MAX_FEATURE 512
/** @brief Available CPU features.
*/
enum CpuFeatures {
CPU_MMX = 1,
CPU_SSE = 2,
CPU_SSE2 = 3,
CPU_SSE3 = 4,
CPU_SSSE3 = 5,
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_FP16 = 9,
CPU_AVX = 10,
CPU_AVX2 = 11,
CPU_FMA3 = 12,
CPU_AVX_512F = 13,
CPU_AVX_512BW = 14,
CPU_AVX_512CD = 15,
CPU_AVX_512DQ = 16,
CPU_AVX_512ER = 17,
CPU_AVX_512IFMA512 = 18, // deprecated
CPU_AVX_512IFMA = 18,
CPU_AVX_512PF = 19,
CPU_AVX_512VBMI = 20,
CPU_AVX_512VL = 21,
CPU_NEON = 100,
CPU_VSX = 200,
CPU_VSX3 = 201,
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE
};
#include "cv_cpu_dispatch.h"
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
#define CV_2PI 6.283185307179586476925286766559
#define CV_LOG2 0.69314718055994530941723212145818
#if defined __ARM_FP16_FORMAT_IEEE \
&& !defined __CUDACC__
# define CV_FP16_TYPE 1
#else
# define CV_FP16_TYPE 0
#endif
typedef union Cv16suf
{
short i;
ushort u;
#if CV_FP16_TYPE
__fp16 h;
#endif
}
Cv16suf;
typedef union Cv32suf
{
int i;
unsigned u;
float f;
}
Cv32suf;
typedef union Cv64suf
{
int64 i;
uint64 u;
double f;
}
Cv64suf;
#define OPENCV_ABI_COMPATIBILITY 300
#ifdef __OPENCV_BUILD
# define DISABLE_OPENCV_24_COMPATIBILITY
# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
#endif
#ifdef CVAPI_EXPORTS
# if (defined _WIN32 || defined WINCE || defined __CYGWIN__)
# define CV_EXPORTS __declspec(dllexport)
# elif defined __GNUC__ && __GNUC__ >= 4
# define CV_EXPORTS __attribute__ ((visibility ("default")))
# endif
#endif
#ifndef CV_EXPORTS
# define CV_EXPORTS
#endif
#ifdef _MSC_VER
# define CV_EXPORTS_TEMPLATE
#else
# define CV_EXPORTS_TEMPLATE CV_EXPORTS
#endif
#ifndef CV_DEPRECATED
# if defined(__GNUC__)
# define CV_DEPRECATED __attribute__ ((deprecated))
# elif defined(_MSC_VER)
# define CV_DEPRECATED __declspec(deprecated)
# else
# define CV_DEPRECATED
# endif
#endif
#ifndef CV_DEPRECATED_EXTERNAL
# if defined(__OPENCV_BUILD)
# define CV_DEPRECATED_EXTERNAL /* nothing */
# else
# define CV_DEPRECATED_EXTERNAL CV_DEPRECATED
# endif
#endif
#ifndef CV_EXTERN_C
# ifdef __cplusplus
# define CV_EXTERN_C extern "C"
# else
# define CV_EXTERN_C
# endif
#endif
/* special informative macros for wrapper generators */
#define CV_EXPORTS_W CV_EXPORTS
#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
#define CV_EXPORTS_AS(synonym) CV_EXPORTS
#define CV_EXPORTS_W_MAP CV_EXPORTS
#define CV_IN_OUT
#define CV_OUT
#define CV_PROP
#define CV_PROP_RW
#define CV_WRAP
#define CV_WRAP_AS(synonym)
/****************************************************************************************\
* Matrix type (Mat) *
\****************************************************************************************/
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1)
#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK)
#define CV_MAT_CONT_FLAG_SHIFT 14
#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT)
#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG)
#define CV_IS_CONT_MAT CV_IS_MAT_CONT
#define CV_SUBMAT_FLAG_SHIFT 15
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
/** Size of each channel item,
0x8442211 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
#define CV_ELEM_SIZE1(type) \
((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
#define CV_ELEM_SIZE(type) \
(CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
#ifndef MIN
# define MIN(a,b) ((a) > (b) ? (b) : (a))
#endif
#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif
/****************************************************************************************\
* static analysys *
\****************************************************************************************/
// In practice, some macro are not processed correctly (noreturn is not detected).
// We need to use simplified definition for them.
#ifndef CV_STATIC_ANALYSIS
# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
# define CV_STATIC_ANALYSIS 1
# endif
#else
# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1) // defined and not empty
# if 0 == CV_STATIC_ANALYSIS
# undef CV_STATIC_ANALYSIS
# endif
# endif
#endif
/****************************************************************************************\
* Thread sanitizer *
\****************************************************************************************/
#ifndef CV_THREAD_SANITIZER
# if defined(__has_feature)
# if __has_feature(thread_sanitizer)
# define CV_THREAD_SANITIZER
# endif
# endif
#endif
/****************************************************************************************\
* exchange-add operation for atomic operations on reference counters *
\****************************************************************************************/
#ifdef CV_XADD
// allow to use user-defined macro
#elif defined __GNUC__ || defined __clang__
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
# ifdef __ATOMIC_ACQ_REL
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
# else
# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
# endif
# else
# if defined __ATOMIC_ACQ_REL && !defined __clang__
// version for gcc >= 4.7
# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
# else
# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
# endif
# endif
#elif defined _MSC_VER && !defined RC_INVOKED
# include <intrin.h>
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
#else
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
#endif
/****************************************************************************************\
* CV_NORETURN attribute *
\****************************************************************************************/
#ifndef CV_NORETURN
# if defined(__GNUC__)
# define CV_NORETURN __attribute__((__noreturn__))
# elif defined(_MSC_VER) && (_MSC_VER >= 1300)
# define CV_NORETURN __declspec(noreturn)
# else
# define CV_NORETURN /* nothing by default */
# endif
#endif
/****************************************************************************************\
* CV_NODISCARD attribute *
* encourages the compiler to issue a warning if the return value is discarded (C++17) *
\****************************************************************************************/
#ifndef CV_NODISCARD
# if defined(__GNUC__)
# define CV_NODISCARD __attribute__((__warn_unused_result__)) // at least available with GCC 3.4
# elif defined(__clang__) && defined(__has_attribute)
# if __has_attribute(__warn_unused_result__)
# define CV_NODISCARD __attribute__((__warn_unused_result__))
# endif
# endif
#endif
#ifndef CV_NODISCARD
# define CV_NODISCARD /* nothing by default */
#endif
/****************************************************************************************\
* C++ 11 *
\****************************************************************************************/
#ifndef CV_CXX11
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
# define CV_CXX11 1
# endif
#else
# if CV_CXX11 == 0
# undef CV_CXX11
# endif
#endif
/****************************************************************************************\
* C++ Move semantics *
\****************************************************************************************/
#ifndef CV_CXX_MOVE_SEMANTICS
# if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
# define CV_CXX_MOVE_SEMANTICS 1
# elif defined(__clang)
# if __has_feature(cxx_rvalue_references)
# define CV_CXX_MOVE_SEMANTICS 1
# endif
# endif
#else
# if CV_CXX_MOVE_SEMANTICS == 0
# undef CV_CXX_MOVE_SEMANTICS
# endif
#endif
/****************************************************************************************\
* C++11 std::array *
\****************************************************************************************/
#ifndef CV_CXX_STD_ARRAY
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
# define CV_CXX_STD_ARRAY 1
# include <array>
# endif
#else
# if CV_CXX_STD_ARRAY == 0
# undef CV_CXX_STD_ARRAY
# endif
#endif
/****************************************************************************************\
* C++11 override / final *
\****************************************************************************************/
#ifndef CV_OVERRIDE
# ifdef CV_CXX11
# define CV_OVERRIDE override
# endif
#endif
#ifndef CV_OVERRIDE
# define CV_OVERRIDE
#endif
#ifndef CV_FINAL
# ifdef CV_CXX11
# define CV_FINAL final
# endif
#endif
#ifndef CV_FINAL
# define CV_FINAL
#endif
// Integer types portatibility
#ifdef OPENCV_STDINT_HEADER
#include OPENCV_STDINT_HEADER
#elif defined(__cplusplus)
#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
namespace cv {
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int16_t;
typedef unsigned short uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
}
#elif defined(_MSC_VER) || __cplusplus >= 201103L
#include <cstdint>
namespace cv {
using std::int8_t;
using std::uint8_t;
using std::int16_t;
using std::uint16_t;
using std::int32_t;
using std::uint32_t;
using std::int64_t;
using std::uint64_t;
}
#else
#include <stdint.h>
namespace cv {
typedef ::int8_t int8_t;
typedef ::uint8_t uint8_t;
typedef ::int16_t int16_t;
typedef ::uint16_t uint16_t;
typedef ::int32_t int32_t;
typedef ::uint32_t uint32_t;
typedef ::int64_t int64_t;
typedef ::uint64_t uint64_t;
}
#endif
#else // pure C
#include <stdint.h>
#endif
#ifdef __cplusplus
namespace cv
{
class float16_t
{
public:
#if CV_FP16_TYPE
float16_t() {}
explicit float16_t(float x) { h = (__fp16)x; }
operator float() const { return (float)h; }
static float16_t fromBits(ushort w)
{
Cv16suf u;
u.u = w;
float16_t result;
result.h = u.h;
return result;
}
static float16_t zero()
{
float16_t result;
result.h = (__fp16)0;
return result;
}
ushort bits() const
{
Cv16suf u;
u.h = h;
return u.u;
}
protected:
__fp16 h;
#else
float16_t() {}
explicit float16_t(float x)
{
#if CV_AVX2
__m128 v = _mm_load_ss(&x);
w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
#else
Cv32suf in;
in.f = x;
unsigned sign = in.u & 0x80000000;
in.u ^= sign;
if( in.u >= 0x47800000 )
w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00);
else
{
if (in.u < 0x38800000)
{
in.f += 0.5f;
w = (ushort)(in.u - 0x3f000000);
}
else
{
unsigned t = in.u + 0xc8000fff;
w = (ushort)((t + ((in.u >> 13) & 1)) >> 13);
}
}
w = (ushort)(w | (sign >> 16));
#endif
}
operator float() const
{
#if CV_AVX2
float f;
_mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
return f;
#else
Cv32suf out;
unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
unsigned sign = (w & 0x8000) << 16;
unsigned e = w & 0x7c00;
out.u = t + (1 << 23);
out.u = (e >= 0x7c00 ? t + 0x38000000 :
e == 0 ? (out.f -= 6.103515625e-05f, out.u) : t) | sign;
return out.f;
#endif
}
static float16_t fromBits(ushort b)
{
float16_t result;
result.w = b;
return result;
}
static float16_t zero()
{
float16_t result;
result.w = (ushort)0;
return result;
}
ushort bits() const { return w; }
protected:
ushort w;
#endif
};
}
#endif
//! @}
#ifndef __cplusplus
#include "opencv2/core/fast_math.hpp" // define cvRound(double)
#endif
#endif // OPENCV_CORE_CVDEF_H

View File

@ -0,0 +1,285 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_CVSTDINL_HPP
#define OPENCV_CORE_CVSTDINL_HPP
#include <complex>
#include <ostream>
//! @cond IGNORED
#ifdef _MSC_VER
#pragma warning( push )
#pragma warning( disable: 4127 )
#endif
namespace cv
{
template<typename _Tp> class DataType< std::complex<_Tp> >
{
public:
typedef std::complex<_Tp> value_type;
typedef value_type work_type;
typedef _Tp channel_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 2,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKETYPE(depth, channels) };
typedef Vec<channel_type, channels> vec_type;
};
inline
String::String(const std::string& str)
: cstr_(0), len_(0)
{
size_t len = str.size();
if (len) memcpy(allocate(len), str.c_str(), len);
}
inline
String::String(const std::string& str, size_t pos, size_t len)
: cstr_(0), len_(0)
{
size_t strlen = str.size();
pos = min(pos, strlen);
len = min(strlen - pos, len);
if (!len) return;
memcpy(allocate(len), str.c_str() + pos, len);
}
inline
String& String::operator = (const std::string& str)
{
deallocate();
size_t len = str.size();
if (len) memcpy(allocate(len), str.c_str(), len);
return *this;
}
inline
String& String::operator += (const std::string& str)
{
*this = *this + str;
return *this;
}
inline
String::operator std::string() const
{
return std::string(cstr_, len_);
}
inline
String operator + (const String& lhs, const std::string& rhs)
{
String s;
size_t rhslen = rhs.size();
s.allocate(lhs.len_ + rhslen);
if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
if (rhslen) memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
return s;
}
inline
String operator + (const std::string& lhs, const String& rhs)
{
String s;
size_t lhslen = lhs.size();
s.allocate(lhslen + rhs.len_);
if (lhslen) memcpy(s.cstr_, lhs.c_str(), lhslen);
if (rhs.len_) memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
return s;
}
inline
FileNode::operator std::string() const
{
String value;
read(*this, value, value);
return value;
}
template<> inline
void operator >> (const FileNode& n, std::string& value)
{
read(n, value, std::string());
}
template<> inline
FileStorage& operator << (FileStorage& fs, const std::string& value)
{
return fs << cv::String(value);
}
static inline
std::ostream& operator << (std::ostream& os, const String& str)
{
return os << str.c_str();
}
static inline
std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
{
fmtd->reset();
for(const char* str = fmtd->next(); str; str = fmtd->next())
out << str;
return out;
}
static inline
std::ostream& operator << (std::ostream& out, const Mat& mtx)
{
return out << Formatter::get()->format(mtx);
}
static inline
std::ostream& operator << (std::ostream& out, const UMat& m)
{
return out << m.getMat(ACCESS_READ);
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
{
return out << "(" << c.re << "," << c.im << ")";
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
{
return out << Formatter::get()->format(Mat(vec));
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
{
return out << Formatter::get()->format(Mat(vec));
}
template<typename _Tp, int m, int n> static inline
std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
{
return out << Formatter::get()->format(Mat(matx));
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
{
out << "[" << p.x << ", " << p.y << "]";
return out;
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
{
out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
return out;
}
template<typename _Tp, int n> static inline
std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
{
out << "[";
if (cv::traits::Depth<_Tp>::value <= CV_32S)
{
for (int i = 0; i < n - 1; ++i) {
out << (int)vec[i] << ", ";
}
out << (int)vec[n-1] << "]";
}
else
{
for (int i = 0; i < n - 1; ++i) {
out << vec[i] << ", ";
}
out << vec[n-1] << "]";
}
return out;
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
{
return out << "[" << size.width << " x " << size.height << "]";
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
{
return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
}
static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
{
int i, dims = msize.dims();
for( i = 0; i < dims; i++ )
{
out << msize[i];
if( i < dims-1 )
out << " x ";
}
return out;
}
static inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
{
return s << "[" << r.start << " : " << r.end << ")";
}
} // cv
#ifdef _MSC_VER
#pragma warning( pop )
#endif
//! @endcond
#endif // OPENCV_CORE_CVSTDINL_HPP

View File

@ -0,0 +1,184 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_DIRECTX_HPP
#define OPENCV_CORE_DIRECTX_HPP
#include "mat.hpp"
#include "ocl.hpp"
#if !defined(__d3d11_h__)
struct ID3D11Device;
struct ID3D11Texture2D;
#endif
#if !defined(__d3d10_h__)
struct ID3D10Device;
struct ID3D10Texture2D;
#endif
#if !defined(_D3D9_H_)
struct IDirect3DDevice9;
struct IDirect3DDevice9Ex;
struct IDirect3DSurface9;
#endif
namespace cv { namespace directx {
namespace ocl {
using namespace cv::ocl;
//! @addtogroup core_directx
// This section describes OpenCL and DirectX interoperability.
//
// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
// supported only on Windows.
//
// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
//
//! @{
// TODO static functions in the Context class
//! @brief Creates OpenCL context from D3D11 device
//
//! @param pD3D11Device - pointer to D3D11 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
//! @brief Creates OpenCL context from D3D10 device
//
//! @param pD3D10Device - pointer to D3D10 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
//! @brief Creates OpenCL context from Direct3DDevice9Ex device
//
//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
//! @brief Creates OpenCL context from Direct3DDevice9 device
//
//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
//! @}
} // namespace cv::directx::ocl
//! @addtogroup core_directx
//! @{
//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
//! input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
//
//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
//! pD3D11Texture2D
//
//! @param src - source InputArray
//! @param pD3D11Texture2D - destination D3D11 texture
CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
//! data will be upsampled and color-converted to BGR format.
//
//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
//! function does memory copy from pD3D11Texture2D to dst
//
//! @param pD3D11Texture2D - source D3D11 texture
//! @param dst - destination OutputArray
CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
//! @brief Converts InputArray to ID3D10Texture2D
//
//! @note Note: function does memory copy from src to
//! pD3D10Texture2D
//
//! @param src - source InputArray
//! @param pD3D10Texture2D - destination D3D10 texture
CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
//! @brief Converts ID3D10Texture2D to OutputArray
//
//! @note Note: function does memory copy from pD3D10Texture2D
//! to dst
//
//! @param pD3D10Texture2D - source D3D10 texture
//! @param dst - destination OutputArray
CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
//! @brief Converts InputArray to IDirect3DSurface9
//
//! @note Note: function does memory copy from src to
//! pDirect3DSurface9
//
//! @param src - source InputArray
//! @param pDirect3DSurface9 - destination D3D10 texture
//! @param surfaceSharedHandle - shared handle
CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
//! @brief Converts IDirect3DSurface9 to OutputArray
//
//! @note Note: function does memory copy from pDirect3DSurface9
//! to dst
//
//! @param pDirect3DSurface9 - source D3D10 texture
//! @param dst - destination OutputArray
//! @param surfaceSharedHandle - shared handle
CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
//! @brief Get OpenCV type from DirectX type
//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
//! @return OpenCV type or -1 if there is no equivalent
CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
//! @brief Get OpenCV type from DirectX type
//! @param iD3DFORMAT - enum D3DTYPE for D3D9
//! @return OpenCV type or -1 if there is no equivalent
CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
//! @}
} } // namespace cv::directx
#endif // OPENCV_CORE_DIRECTX_HPP

View File

@ -0,0 +1,280 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_EIGEN_HPP
#define OPENCV_CORE_EIGEN_HPP
#include "opencv2/core.hpp"
#if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( disable: 4714 ) //__forceinline is not inlined
#pragma warning( disable: 4127 ) //conditional expression is constant
#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
#endif
namespace cv
{
//! @addtogroup core_eigen
//! @{
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
{
if( !(src.Flags & Eigen::RowMajorBit) )
{
Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
(void*)src.data(), src.outerStride()*sizeof(_Tp));
transpose(_src, dst);
}
else
{
Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
(void*)src.data(), src.outerStride()*sizeof(_Tp));
_src.copyTo(dst);
}
}
// Matx case
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
Matx<_Tp, _rows, _cols>& dst )
{
if( !(src.Flags & Eigen::RowMajorBit) )
{
dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
}
else
{
dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
}
}
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
{
CV_DbgAssert(src.rows == _rows && src.cols == _cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else if( src.cols == src.rows )
{
src.convertTo(_dst, _dst.type());
transpose(_dst, _dst);
}
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
// Matx case
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
{
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
Mat(src).copyTo(_dst);
}
}
template<typename _Tp> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
{
dst.resize(src.rows, src.cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else if( src.cols == src.rows )
{
src.convertTo(_dst, _dst.type());
transpose(_dst, _dst);
}
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
// Matx case
template<typename _Tp, int _rows, int _cols> static inline
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
{
dst.resize(_rows, _cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
Mat(src).copyTo(_dst);
}
}
template<typename _Tp> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
{
CV_Assert(src.cols == 1);
dst.resize(src.rows);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
// Matx case
template<typename _Tp, int _rows> static inline
void cv2eigen( const Matx<_Tp, _rows, 1>& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
{
dst.resize(_rows);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(1, _rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
src.copyTo(_dst);
}
}
template<typename _Tp> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
{
CV_Assert(src.rows == 1);
dst.resize(src.cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
//Matx
template<typename _Tp, int _cols> static inline
void cv2eigen( const Matx<_Tp, 1, _cols>& src,
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
{
dst.resize(_cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(1, _cols, traits::Type<_Tp>::value,
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
Mat(src).copyTo(_dst);
}
}
//! @}
} // cv
#endif

View File

@ -0,0 +1,271 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CORE_FAST_MATH_HPP
#define OPENCV_CORE_FAST_MATH_HPP
#include "opencv2/core/cvdef.h"
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
#include <emmintrin.h>
#endif
//! @addtogroup core_utils
//! @{
/****************************************************************************************\
* fast math *
\****************************************************************************************/
#ifdef __cplusplus
# include <cmath>
#else
# ifdef __BORLANDC__
# include <fastmath.h>
# else
# include <math.h>
# endif
#endif
#ifdef HAVE_TEGRA_OPTIMIZATION
# include "tegra_round.hpp"
#endif
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
// 1. general scheme
#define ARM_ROUND(_value, _asm_string) \
int res; \
float temp; \
CV_UNUSED(temp); \
__asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
return res
// 2. version for double
#ifdef __clang__
#define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
#else
#define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
#endif
// 3. version for float
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
#endif
/** @brief Rounds floating-point number to the nearest integer
@param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
result is not defined.
*/
CV_INLINE int
cvRound( double value )
{
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86
int t;
__asm
{
fld value;
fistp t;
}
return t;
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_DBL(value);
#elif defined CV_ICC || defined __GNUC__
# if defined ARM_ROUND_DBL
ARM_ROUND_DBL(value);
# else
return (int)lrint(value);
# endif
#else
/* it's ok if round does not comply with IEEE754 standard;
the tests should allow +/-1 difference when the tested functions use round */
return (int)(value + (value >= 0 ? 0.5 : -0.5));
#endif
}
/** @brief Rounds floating-point number to the nearest integer not larger than the original.
The function computes an integer i such that:
\f[i \le \texttt{value} < i+1\f]
@param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
result is not defined.
*/
CV_INLINE int cvFloor( double value )
{
int i = (int)value;
return i - (i > value);
}
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
The function computes an integer i such that:
\f[i \le \texttt{value} < i+1\f]
@param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
result is not defined.
*/
CV_INLINE int cvCeil( double value )
{
int i = (int)value;
return i + (i < value);
}
/** @brief Determines if the argument is Not A Number.
@param value The input floating-point value
The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
otherwise. */
CV_INLINE int cvIsNaN( double value )
{
Cv64suf ieee754;
ieee754.f = value;
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
((unsigned)ieee754.u != 0) > 0x7ff00000;
}
/** @brief Determines if the argument is Infinity.
@param value The input floating-point value
The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
and 0 otherwise. */
CV_INLINE int cvIsInf( double value )
{
Cv64suf ieee754;
ieee754.f = value;
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
(unsigned)ieee754.u == 0;
}
#ifdef __cplusplus
/** @overload */
CV_INLINE int cvRound(float value)
{
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86
int t;
__asm
{
fld value;
fistp t;
}
return t;
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_FLT(value);
#elif defined CV_ICC || defined __GNUC__
# if defined ARM_ROUND_FLT
ARM_ROUND_FLT(value);
# else
return (int)lrintf(value);
# endif
#else
/* it's ok if round does not comply with IEEE754 standard;
the tests should allow +/-1 difference when the tested functions use round */
return (int)(value + (value >= 0 ? 0.5f : -0.5f));
#endif
}
/** @overload */
CV_INLINE int cvRound( int value )
{
return value;
}
/** @overload */
CV_INLINE int cvFloor( float value )
{
int i = (int)value;
return i - (i > value);
}
/** @overload */
CV_INLINE int cvFloor( int value )
{
return value;
}
/** @overload */
CV_INLINE int cvCeil( float value )
{
int i = (int)value;
return i + (i < value);
}
/** @overload */
CV_INLINE int cvCeil( int value )
{
return value;
}
/** @overload */
CV_INLINE int cvIsNaN( float value )
{
Cv32suf ieee754;
ieee754.f = value;
return (ieee754.u & 0x7fffffff) > 0x7f800000;
}
/** @overload */
CV_INLINE int cvIsInf( float value )
{
Cv32suf ieee754;
ieee754.f = value;
return (ieee754.u & 0x7fffffff) == 0x7f800000;
}
#endif // __cplusplus
//! @} core_utils
#endif

View File

@ -0,0 +1,250 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_HAL_HPP
#define OPENCV_HAL_HPP
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
#include "opencv2/core/hal/interface.h"
namespace cv { namespace hal {
//! @addtogroup core_hal_functions
//! @{
CV_EXPORTS int normHamming(const uchar* a, int n);
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
CV_EXPORTS float normL1_(const float* a, const float* b, int n);
CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
CV_EXPORTS void exp32f(const float* src, float* dst, int n);
CV_EXPORTS void exp64f(const double* src, double* dst, int n);
CV_EXPORTS void log32f(const float* src, float* dst, int n);
CV_EXPORTS void log64f(const double* src, double* dst, int n);
CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
struct CV_EXPORTS DFT1D
{
static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
virtual void apply(const uchar *src, uchar *dst) = 0;
virtual ~DFT1D() {}
};
struct CV_EXPORTS DFT2D
{
static Ptr<DFT2D> create(int width, int height, int depth,
int src_channels, int dst_channels,
int flags, int nonzero_rows = 0);
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
virtual ~DFT2D() {}
};
struct CV_EXPORTS DCT2D
{
static Ptr<DCT2D> create(int width, int height, int depth, int flags);
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
virtual ~DCT2D() {}
};
//! @} core_hal
//=============================================================================
// for binary compatibility with 3.0
//! @cond IGNORED
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS void exp(const float* src, float* dst, int n);
CV_EXPORTS void exp(const double* src, double* dst, int n);
CV_EXPORTS void log(const float* src, float* dst, int n);
CV_EXPORTS void log(const double* src, double* dst, int n);
CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
CV_EXPORTS void sqrt(const float* src, float* dst, int len);
CV_EXPORTS void sqrt(const double* src, double* dst, int len);
CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
//! @endcond
}} //cv::hal
#endif //OPENCV_HAL_HPP

View File

@ -0,0 +1,182 @@
#ifndef OPENCV_CORE_HAL_INTERFACE_H
#define OPENCV_CORE_HAL_INTERFACE_H
//! @addtogroup core_hal_interface
//! @{
//! @name Return codes
//! @{
#define CV_HAL_ERROR_OK 0
#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
#define CV_HAL_ERROR_UNKNOWN -1
//! @}
#ifdef __cplusplus
#include <cstddef>
#else
#include <stddef.h>
#include <stdbool.h>
#endif
//! @name Data types
//! primitive types
//! - schar - signed 1 byte integer
//! - uchar - unsigned 1 byte integer
//! - short - signed 2 byte integer
//! - ushort - unsigned 2 byte integer
//! - int - signed 4 byte integer
//! - uint - unsigned 4 byte integer
//! - int64 - signed 8 byte integer
//! - uint64 - unsigned 8 byte integer
//! @{
#if !defined _MSC_VER && !defined __BORLANDC__
# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
# include <cstdint>
# ifdef __NEWLIB__
typedef unsigned int uint;
# else
typedef std::uint32_t uint;
# endif
# else
# include <stdint.h>
typedef uint32_t uint;
# endif
#else
typedef unsigned uint;
#endif
typedef signed char schar;
#ifndef __IPL_H__
typedef unsigned char uchar;
typedef unsigned short ushort;
#endif
#if defined _MSC_VER || defined __BORLANDC__
typedef __int64 int64;
typedef unsigned __int64 uint64;
# define CV_BIG_INT(n) n##I64
# define CV_BIG_UINT(n) n##UI64
#else
typedef int64_t int64;
typedef uint64_t uint64;
# define CV_BIG_INT(n) n##LL
# define CV_BIG_UINT(n) n##ULL
#endif
#define CV_CN_MAX 512
#define CV_CN_SHIFT 3
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
#define CV_8U 0
#define CV_8S 1
#define CV_16U 2
#define CV_16S 3
#define CV_32S 4
#define CV_32F 5
#define CV_64F 6
#define CV_USRTYPE1 7
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
#define CV_MAKE_TYPE CV_MAKETYPE
#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
//! @}
//! @name Comparison operation
//! @sa cv::CmpTypes
//! @{
#define CV_HAL_CMP_EQ 0
#define CV_HAL_CMP_GT 1
#define CV_HAL_CMP_GE 2
#define CV_HAL_CMP_LT 3
#define CV_HAL_CMP_LE 4
#define CV_HAL_CMP_NE 5
//! @}
//! @name Border processing modes
//! @sa cv::BorderTypes
//! @{
#define CV_HAL_BORDER_CONSTANT 0
#define CV_HAL_BORDER_REPLICATE 1
#define CV_HAL_BORDER_REFLECT 2
#define CV_HAL_BORDER_WRAP 3
#define CV_HAL_BORDER_REFLECT_101 4
#define CV_HAL_BORDER_TRANSPARENT 5
#define CV_HAL_BORDER_ISOLATED 16
//! @}
//! @name DFT flags
//! @{
#define CV_HAL_DFT_INVERSE 1
#define CV_HAL_DFT_SCALE 2
#define CV_HAL_DFT_ROWS 4
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
#define CV_HAL_DFT_REAL_OUTPUT 32
#define CV_HAL_DFT_TWO_STAGE 64
#define CV_HAL_DFT_STAGE_COLS 128
#define CV_HAL_DFT_IS_CONTINUOUS 512
#define CV_HAL_DFT_IS_INPLACE 1024
//! @}
//! @name SVD flags
//! @{
#define CV_HAL_SVD_NO_UV 1
#define CV_HAL_SVD_SHORT_UV 2
#define CV_HAL_SVD_MODIFY_A 4
#define CV_HAL_SVD_FULL_UV 8
//! @}
//! @name Gemm flags
//! @{
#define CV_HAL_GEMM_1_T 1
#define CV_HAL_GEMM_2_T 2
#define CV_HAL_GEMM_3_T 4
//! @}
//! @}
#endif

View File

@ -0,0 +1,420 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_HAL_INTRIN_HPP
#define OPENCV_HAL_INTRIN_HPP
#include <cmath>
#include <float.h>
#include <stdlib.h>
#include "opencv2/core/cvdef.h"
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
#define OPENCV_HAL_AND(a, b) ((a) & (b))
#define OPENCV_HAL_NOP(a) (a)
#define OPENCV_HAL_1ST(a, b) (a)
// unlike HAL API, which is in cv::hal,
// we put intrinsics into cv namespace to make its
// access from within opencv code more accessible
namespace cv {
namespace hal {
enum StoreMode
{
STORE_UNALIGNED = 0,
STORE_ALIGNED = 1,
STORE_ALIGNED_NOCACHE = 2
};
}
template<typename _Tp> struct V_TypeTraits
{
};
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \
template<> struct V_TypeTraits<type> \
{ \
typedef type value_type; \
typedef int_type_ int_type; \
typedef abs_type_ abs_type; \
typedef uint_type_ uint_type; \
typedef w_type_ w_type; \
typedef q_type_ q_type; \
typedef sum_type_ sum_type; \
enum { nlanes128 = nlanes128_ }; \
\
static inline int_type reinterpret_int(type x) \
{ \
union { type l; int_type i; } v; \
v.l = x; \
return v.i; \
} \
\
static inline type reinterpret_from_int(int_type x) \
{ \
union { type l; int_type i; } v; \
v.i = x; \
return v.l; \
} \
}
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16);
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16);
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8);
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8);
CV_INTRIN_DEF_TYPE_TRAITS(unsigned, int, unsigned, unsigned, uint64, void, unsigned, 4);
CV_INTRIN_DEF_TYPE_TRAITS(int, int, unsigned, unsigned, int64, void, int, 4);
CV_INTRIN_DEF_TYPE_TRAITS(float, int, unsigned, float, double, void, float, 4);
CV_INTRIN_DEF_TYPE_TRAITS(uint64, int64, uint64, uint64, void, void, uint64, 2);
CV_INTRIN_DEF_TYPE_TRAITS(int64, int64, uint64, uint64, void, void, int64, 2);
CV_INTRIN_DEF_TYPE_TRAITS(double, int64, uint64, double, void, void, double, 2);
#ifndef CV_DOXYGEN
#ifdef CV_CPU_DISPATCH_MODE
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#endif
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#endif
}
#ifdef CV_DOXYGEN
# undef CV_AVX2
# undef CV_SSE2
# undef CV_NEON
# undef CV_VSX
# undef CV_FP16
#endif
#if CV_SSE2 || CV_NEON || CV_VSX
#define CV__SIMD_FORWARD 128
#include "opencv2/core/hal/intrin_forward.hpp"
#endif
#if CV_SSE2
#include "opencv2/core/hal/intrin_sse_em.hpp"
#include "opencv2/core/hal/intrin_sse.hpp"
#elif CV_NEON
#include "opencv2/core/hal/intrin_neon.hpp"
#elif CV_VSX
#include "opencv2/core/hal/intrin_vsx.hpp"
#else
#define CV_SIMD128_CPP 1
#include "opencv2/core/hal/intrin_cpp.hpp"
#endif
// AVX2 can be used together with SSE2, so
// we define those two sets of intrinsics at once.
// Most of the intrinsics do not conflict (the proper overloaded variant is
// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
// available instruction set) will get vx_ prefix
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
#if CV_AVX2
#define CV__SIMD_FORWARD 256
#include "opencv2/core/hal/intrin_forward.hpp"
#include "opencv2/core/hal/intrin_avx.hpp"
#endif
//! @cond IGNORED
namespace cv {
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
#ifndef CV_SIMD128
#define CV_SIMD128 0
#endif
#ifndef CV_SIMD128_64F
#define CV_SIMD128_64F 0
#endif
#ifndef CV_SIMD256
#define CV_SIMD256 0
#endif
#ifndef CV_SIMD256_64F
#define CV_SIMD256_64F 0
#endif
#ifndef CV_SIMD512
#define CV_SIMD512 0
#endif
#ifndef CV_SIMD512_64F
#define CV_SIMD512_64F 0
#endif
#ifndef CV_SIMD128_FP16
#define CV_SIMD128_FP16 0
#endif
#ifndef CV_SIMD256_FP16
#define CV_SIMD256_FP16 0
#endif
#ifndef CV_SIMD512_FP16
#define CV_SIMD512_FP16 0
#endif
//==================================================================================================
#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix)
#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \
CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix)
template<typename _Tp> struct V_RegTraits
{
};
#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
template<> struct V_RegTraits<_reg> \
{ \
typedef _reg reg; \
typedef _u_reg u_reg; \
typedef _w_reg w_reg; \
typedef _q_reg q_reg; \
typedef _int_reg int_reg; \
typedef _round_reg round_reg; \
}
#if CV_SIMD128 || CV_SIMD128_CPP
CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
#if CV_SIMD128_64F
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
#else
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
#endif
CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
#if CV_SIMD128_64F
CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
#endif
#endif
#if CV_SIMD256
CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
#endif
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
#define CV__SIMD_NAMESPACE simd512
namespace CV__SIMD_NAMESPACE {
#define CV_SIMD 1
#define CV_SIMD_64F CV_SIMD512_64F
#define CV_SIMD_WIDTH 64
// TODO typedef v_uint8 / v_int32 / etc types here
} // namespace
using namespace CV__SIMD_NAMESPACE;
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
#define CV__SIMD_NAMESPACE simd256
namespace CV__SIMD_NAMESPACE {
#define CV_SIMD 1
#define CV_SIMD_64F CV_SIMD256_64F
#define CV_SIMD_FP16 CV_SIMD256_FP16
#define CV_SIMD_WIDTH 32
typedef v_uint8x32 v_uint8;
typedef v_int8x32 v_int8;
typedef v_uint16x16 v_uint16;
typedef v_int16x16 v_int16;
typedef v_uint32x8 v_uint32;
typedef v_int32x8 v_int32;
typedef v_uint64x4 v_uint64;
typedef v_int64x4 v_int64;
typedef v_float32x8 v_float32;
#if CV_SIMD256_64F
typedef v_float64x4 v_float64;
#endif
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
inline void vx_cleanup() { v256_cleanup(); }
} // namespace
using namespace CV__SIMD_NAMESPACE;
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
#define CV__SIMD_NAMESPACE simd128
namespace CV__SIMD_NAMESPACE {
#define CV_SIMD CV_SIMD128
#define CV_SIMD_64F CV_SIMD128_64F
#define CV_SIMD_WIDTH 16
typedef v_uint8x16 v_uint8;
typedef v_int8x16 v_int8;
typedef v_uint16x8 v_uint16;
typedef v_int16x8 v_int16;
typedef v_uint32x4 v_uint32;
typedef v_int32x4 v_int32;
typedef v_uint64x2 v_uint64;
typedef v_int64x2 v_int64;
typedef v_float32x4 v_float32;
#if CV_SIMD128_64F
typedef v_float64x2 v_float64;
#endif
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v)
#if CV_SIMD128_64F
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load)
#endif
inline void vx_cleanup() { v_cleanup(); }
} // namespace
using namespace CV__SIMD_NAMESPACE;
#endif
inline unsigned int trailingZeros32(unsigned int value) {
#if defined(_MSC_VER)
#if (_MSC_VER < 1700) || defined(_M_ARM)
unsigned long index = 0;
_BitScanForward(&index, value);
return (unsigned int)index;
#elif defined(__clang__)
// clang-cl doesn't export _tzcnt_u32 for non BMI systems
return value ? __builtin_ctz(value) : 32;
#else
return _tzcnt_u32(value);
#endif
#elif defined(__GNUC__) || defined(__GNUG__)
return __builtin_ctz(value);
#elif defined(__ICC) || defined(__INTEL_COMPILER)
return _bit_scan_forward(value);
#elif defined(__clang__)
return llvm.cttz.i32(value, true);
#else
static const int MultiplyDeBruijnBitPosition[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
#endif
}
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
#ifndef CV_SIMD_64F
#define CV_SIMD_64F 0
#endif
#ifndef CV_SIMD_FP16
#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
#endif
#ifndef CV_SIMD
#define CV_SIMD 0
#endif
} // cv::
//! @endcond
#endif

View File

@ -0,0 +1,158 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#ifndef CV__SIMD_FORWARD
#error "Need to pre-define forward width"
#endif
namespace cv
{
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
/** Types **/
#if CV__SIMD_FORWARD == 512
// [todo] 512
#error "AVX512 Not implemented yet"
#elif CV__SIMD_FORWARD == 256
// 256
#define __CV_VX(fun) v256_##fun
#define __CV_V_UINT8 v_uint8x32
#define __CV_V_INT8 v_int8x32
#define __CV_V_UINT16 v_uint16x16
#define __CV_V_INT16 v_int16x16
#define __CV_V_UINT32 v_uint32x8
#define __CV_V_INT32 v_int32x8
#define __CV_V_UINT64 v_uint64x4
#define __CV_V_INT64 v_int64x4
#define __CV_V_FLOAT32 v_float32x8
#define __CV_V_FLOAT64 v_float64x4
struct v_uint8x32;
struct v_int8x32;
struct v_uint16x16;
struct v_int16x16;
struct v_uint32x8;
struct v_int32x8;
struct v_uint64x4;
struct v_int64x4;
struct v_float32x8;
struct v_float64x4;
#else
// 128
#define __CV_VX(fun) v_##fun
#define __CV_V_UINT8 v_uint8x16
#define __CV_V_INT8 v_int8x16
#define __CV_V_UINT16 v_uint16x8
#define __CV_V_INT16 v_int16x8
#define __CV_V_UINT32 v_uint32x4
#define __CV_V_INT32 v_int32x4
#define __CV_V_UINT64 v_uint64x2
#define __CV_V_INT64 v_int64x2
#define __CV_V_FLOAT32 v_float32x4
#define __CV_V_FLOAT64 v_float64x2
struct v_uint8x16;
struct v_int8x16;
struct v_uint16x8;
struct v_int16x8;
struct v_uint32x4;
struct v_int32x4;
struct v_uint64x2;
struct v_int64x2;
struct v_float32x4;
struct v_float64x2;
#endif
/** Value reordering **/
// Expansion
void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
// Low Expansion
__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
__CV_V_INT16 v_expand_low(const __CV_V_INT8&);
__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
__CV_V_INT32 v_expand_low(const __CV_V_INT16&);
__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
__CV_V_INT64 v_expand_low(const __CV_V_INT32&);
// High Expansion
__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
__CV_V_INT16 v_expand_high(const __CV_V_INT8&);
__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
__CV_V_INT32 v_expand_high(const __CV_V_INT16&);
__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
__CV_V_INT64 v_expand_high(const __CV_V_INT32&);
// Load & Low Expansion
__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
__CV_V_INT16 __CV_VX(load_expand)(const schar*);
__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
__CV_V_INT32 __CV_VX(load_expand)(const short*);
__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
__CV_V_INT64 __CV_VX(load_expand)(const int*);
// Load lower 8-bit and expand into 32-bit
__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
__CV_V_INT32 __CV_VX(load_expand_q)(const schar*);
// Saturating Pack
__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&);
__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&);
// Non-saturating Pack
__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&);
// Pack signed integers with unsigned saturation
__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
/** Arithmetic, bitwise and comparison operations **/
// Non-saturating multiply
#if CV_VSX
template<typename Tvec>
Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
#else
__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&);
__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&);
__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&);
#endif
// Multiply and expand
#if CV_VSX
template<typename Tvec, typename Twvec>
void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
#else
void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
#endif
/** Cleanup **/
#undef CV__SIMD_FORWARD
#undef __CV_VX
#undef __CV_V_UINT8
#undef __CV_V_INT8
#undef __CV_V_UINT16
#undef __CV_V_INT16
#undef __CV_V_UINT32
#undef __CV_V_INT32
#undef __CV_V_UINT64
#undef __CV_V_INT64
#undef __CV_V_FLOAT32
#undef __CV_V_FLOAT64
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
} // cv::

Some files were not shown because too many files have changed in this diff Show More